1 1.1 christos /* Extended regular expression matching and search library. 2 1.1 christos Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3 1.1 christos This file is part of the GNU C Library. 4 1.1 christos Contributed by Isamu Hasegawa <isamu (at) yamato.ibm.com>. 5 1.1 christos 6 1.1 christos This program is free software; you can redistribute it and/or modify 7 1.1 christos it under the terms of the GNU General Public License as published by 8 1.1 christos the Free Software Foundation; either version 2, or (at your option) 9 1.1 christos any later version. 10 1.1 christos 11 1.1 christos This program is distributed in the hope that it will be useful, 12 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 1.1 christos GNU General Public License for more details. 15 1.1 christos 16 1.1 christos You should have received a copy of the GNU General Public License along 17 1.1 christos with this program; if not, write to the Free Software Foundation, 18 1.1 christos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 1.3 christos #include <sys/cdefs.h> 20 1.3 christos __RCSID("$NetBSD: regexec.c,v 1.3 2016/05/17 14:00:09 christos Exp $"); 21 1.3 christos 22 1.1 christos 23 1.1 christos static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, 24 1.1 christos Idx n) internal_function; 25 1.1 christos static void match_ctx_clean (re_match_context_t *mctx) internal_function; 26 1.1 christos static void match_ctx_free (re_match_context_t *cache) internal_function; 27 1.1 christos static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, 28 1.1 christos Idx str_idx, Idx from, Idx to) 29 1.1 christos internal_function; 30 1.1 christos static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) 31 1.1 christos internal_function; 32 1.1 christos static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, 33 1.1 christos Idx str_idx) internal_function; 34 1.1 christos static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, 35 1.1 christos Idx node, Idx str_idx) 36 1.1 christos internal_function; 37 1.1 christos static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, 38 1.1 christos re_dfastate_t **limited_sts, Idx last_node, 39 1.1 christos Idx last_str_idx) 40 1.1 christos internal_function; 41 1.1 christos static reg_errcode_t re_search_internal (const regex_t *preg, 42 1.1 christos const char *string, Idx length, 43 1.1 christos Idx start, Idx last_start, Idx stop, 44 1.1 christos size_t nmatch, regmatch_t pmatch[], 45 1.1 christos int eflags) internal_function; 46 1.1 christos static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, 47 1.1 christos const char *string1, Idx length1, 48 1.1 christos const char *string2, Idx length2, 49 1.1 christos Idx start, regoff_t range, 50 1.1 christos struct re_registers *regs, 51 1.1 christos Idx stop, bool ret_len) internal_function; 52 1.1 christos static regoff_t re_search_stub (struct re_pattern_buffer *bufp, 53 1.1 christos const char *string, Idx length, Idx start, 54 1.1 christos regoff_t range, Idx stop, 55 1.1 christos struct re_registers *regs, 56 1.1 christos bool ret_len) internal_function; 57 1.1 christos static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, 58 1.1 christos Idx nregs, int regs_allocated) internal_function; 59 1.1 christos static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) 60 1.1 christos internal_function; 61 1.1 christos static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, 62 1.1 christos Idx *p_match_first) 63 1.1 christos internal_function; 64 1.1 christos static Idx check_halt_state_context (const re_match_context_t *mctx, 65 1.1 christos const re_dfastate_t *state, Idx idx) 66 1.1 christos internal_function; 67 1.1 christos static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, 68 1.1 christos regmatch_t *prev_idx_match, Idx cur_node, 69 1.1 christos Idx cur_idx, Idx nmatch) internal_function; 70 1.1 christos static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, 71 1.1 christos Idx str_idx, Idx dest_node, Idx nregs, 72 1.1 christos regmatch_t *regs, 73 1.1 christos re_node_set *eps_via_nodes) internal_function; 74 1.1 christos static reg_errcode_t set_regs (const regex_t *preg, 75 1.1 christos const re_match_context_t *mctx, 76 1.1 christos size_t nmatch, regmatch_t *pmatch, 77 1.1 christos bool fl_backtrack) internal_function; 78 1.1 christos static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function; 79 1.1 christos 80 1.1 christos #ifdef RE_ENABLE_I18N 81 1.1 christos static int sift_states_iter_mb (const re_match_context_t *mctx, 82 1.1 christos re_sift_context_t *sctx, 83 1.1 christos Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function; 84 1.1 christos #endif /* RE_ENABLE_I18N */ 85 1.1 christos static reg_errcode_t sift_states_backward (re_match_context_t *mctx, 86 1.1 christos re_sift_context_t *sctx) internal_function; 87 1.1 christos static reg_errcode_t build_sifted_states (re_match_context_t *mctx, 88 1.1 christos re_sift_context_t *sctx, Idx str_idx, 89 1.1 christos re_node_set *cur_dest) internal_function; 90 1.1 christos static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx, 91 1.1 christos re_sift_context_t *sctx, 92 1.1 christos Idx str_idx, 93 1.1 christos re_node_set *dest_nodes) internal_function; 94 1.1 christos static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, 95 1.1 christos re_node_set *dest_nodes, 96 1.1 christos const re_node_set *candidates) internal_function; 97 1.1 christos static bool check_dst_limits (const re_match_context_t *mctx, 98 1.1 christos const re_node_set *limits, 99 1.1 christos Idx dst_node, Idx dst_idx, Idx src_node, 100 1.1 christos Idx src_idx) internal_function; 101 1.1 christos static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, 102 1.1 christos int boundaries, Idx subexp_idx, 103 1.1 christos Idx from_node, Idx bkref_idx) internal_function; 104 1.1 christos static int check_dst_limits_calc_pos (const re_match_context_t *mctx, 105 1.1 christos Idx limit, Idx subexp_idx, 106 1.1 christos Idx node, Idx str_idx, 107 1.1 christos Idx bkref_idx) internal_function; 108 1.1 christos static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, 109 1.1 christos re_node_set *dest_nodes, 110 1.1 christos const re_node_set *candidates, 111 1.1 christos re_node_set *limits, 112 1.1 christos struct re_backref_cache_entry *bkref_ents, 113 1.1 christos Idx str_idx) internal_function; 114 1.1 christos static reg_errcode_t sift_states_bkref (re_match_context_t *mctx, 115 1.1 christos re_sift_context_t *sctx, 116 1.1 christos Idx str_idx, const re_node_set *candidates) internal_function; 117 1.1 christos static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, 118 1.1 christos re_dfastate_t **src, Idx num) internal_function; 119 1.1 christos static re_dfastate_t *find_recover_state (reg_errcode_t *err, 120 1.1 christos re_match_context_t *mctx) internal_function; 121 1.1 christos static re_dfastate_t *transit_state (reg_errcode_t *err, 122 1.1 christos re_match_context_t *mctx, 123 1.1 christos re_dfastate_t *state) internal_function; 124 1.1 christos static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, 125 1.1 christos re_match_context_t *mctx, 126 1.1 christos re_dfastate_t *next_state) internal_function; 127 1.1 christos static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, 128 1.1 christos re_node_set *cur_nodes, 129 1.1 christos Idx str_idx) internal_function; 130 1.1 christos #if 0 131 1.1 christos static re_dfastate_t *transit_state_sb (reg_errcode_t *err, 132 1.1 christos re_match_context_t *mctx, 133 1.1 christos re_dfastate_t *pstate) internal_function; 134 1.1 christos #endif 135 1.1 christos #ifdef RE_ENABLE_I18N 136 1.1 christos static reg_errcode_t transit_state_mb (re_match_context_t *mctx, 137 1.1 christos re_dfastate_t *pstate) internal_function; 138 1.1 christos #endif /* RE_ENABLE_I18N */ 139 1.1 christos static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, 140 1.1 christos const re_node_set *nodes) internal_function; 141 1.1 christos static reg_errcode_t get_subexp (re_match_context_t *mctx, 142 1.1 christos Idx bkref_node, Idx bkref_str_idx) internal_function; 143 1.1 christos static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, 144 1.1 christos const re_sub_match_top_t *sub_top, 145 1.1 christos re_sub_match_last_t *sub_last, 146 1.1 christos Idx bkref_node, Idx bkref_str) internal_function; 147 1.1 christos static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 148 1.1 christos Idx subexp_idx, int type) internal_function; 149 1.1 christos static reg_errcode_t check_arrival (re_match_context_t *mctx, 150 1.1 christos state_array_t *path, Idx top_node, 151 1.1 christos Idx top_str, Idx last_node, Idx last_str, 152 1.1 christos int type) internal_function; 153 1.1 christos static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, 154 1.1 christos Idx str_idx, 155 1.1 christos re_node_set *cur_nodes, 156 1.1 christos re_node_set *next_nodes) internal_function; 157 1.1 christos static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa, 158 1.1 christos re_node_set *cur_nodes, 159 1.1 christos Idx ex_subexp, int type) internal_function; 160 1.1 christos static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa, 161 1.1 christos re_node_set *dst_nodes, 162 1.1 christos Idx target, Idx ex_subexp, 163 1.1 christos int type) internal_function; 164 1.1 christos static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, 165 1.1 christos re_node_set *cur_nodes, Idx cur_str, 166 1.1 christos Idx subexp_num, int type) internal_function; 167 1.1 christos static bool build_trtable (re_dfa_t *dfa, 168 1.1 christos re_dfastate_t *state) internal_function; 169 1.1 christos #ifdef RE_ENABLE_I18N 170 1.1 christos static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, 171 1.1 christos const re_string_t *input, Idx idx) internal_function; 172 1.1 christos # ifdef _LIBC 173 1.1 christos static unsigned int find_collation_sequence_value (const unsigned char *mbs, 174 1.1 christos size_t name_len) internal_function; 175 1.1 christos # endif /* _LIBC */ 176 1.1 christos #endif /* RE_ENABLE_I18N */ 177 1.1 christos static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, 178 1.1 christos const re_dfastate_t *state, 179 1.1 christos re_node_set *states_node, 180 1.1 christos bitset *states_ch) internal_function; 181 1.1 christos static bool check_node_accept (const re_match_context_t *mctx, 182 1.1 christos const re_token_t *node, Idx idx) 183 1.1 christos internal_function; 184 1.1 christos static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function; 185 1.1 christos 186 1.1 christos /* Entry point for POSIX code. */ 188 1.1 christos 189 1.1 christos /* regexec searches for a given pattern, specified by PREG, in the 190 1.1 christos string STRING. 191 1.1 christos 192 1.1 christos If NMATCH is zero or REG_NOSUB was set in the cflags argument to 193 1.1 christos `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 194 1.1 christos least NMATCH elements, and we set them to the offsets of the 195 1.1 christos corresponding matched substrings. 196 1.1 christos 197 1.1 christos EFLAGS specifies `execution flags' which affect matching: if 198 1.1 christos REG_NOTBOL is set, then ^ does not match at the beginning of the 199 1.1 christos string; if REG_NOTEOL is set, then $ does not match at the end. 200 1.1 christos 201 1.1 christos We return 0 if we find a match and REG_NOMATCH if not. */ 202 1.1 christos 203 1.1 christos int 204 1.1 christos regexec (const regex_t *__restrict preg, const char *__restrict string, 205 1.1 christos size_t nmatch, regmatch_t pmatch[], int eflags) 206 1.1 christos { 207 1.1 christos reg_errcode_t err; 208 1.1 christos Idx start, length; 209 1.1 christos #ifdef _LIBC 210 1.1 christos re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; 211 1.1 christos #endif 212 1.1 christos 213 1.1 christos if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) 214 1.1 christos return REG_BADPAT; 215 1.1 christos 216 1.1 christos if (eflags & REG_STARTEND) 217 1.1 christos { 218 1.1 christos start = pmatch[0].rm_so; 219 1.1 christos length = pmatch[0].rm_eo; 220 1.1 christos } 221 1.1 christos else 222 1.1 christos { 223 1.1 christos start = 0; 224 1.1 christos length = strlen (string); 225 1.1 christos } 226 1.1 christos 227 1.1 christos __libc_lock_lock (dfa->lock); 228 1.1 christos if (preg->re_no_sub) 229 1.1 christos err = re_search_internal (preg, string, length, start, length, 230 1.1 christos length, 0, NULL, eflags); 231 1.1 christos else 232 1.1 christos err = re_search_internal (preg, string, length, start, length, 233 1.1 christos length, nmatch, pmatch, eflags); 234 1.1 christos __libc_lock_unlock (dfa->lock); 235 1.1 christos return err != REG_NOERROR; 236 1.1 christos } 237 1.1 christos 238 1.1 christos #ifdef _LIBC 239 1.1 christos # include <shlib-compat.h> 240 1.1 christos versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); 241 1.1 christos 242 1.1 christos # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) 243 1.1 christos __typeof__ (__regexec) __compat_regexec; 244 1.1 christos 245 1.1 christos int 246 1.1 christos attribute_compat_text_section 247 1.1 christos __compat_regexec (const regex_t *__restrict preg, 248 1.1 christos const char *__restrict string, size_t nmatch, 249 1.1 christos regmatch_t pmatch[], int eflags) 250 1.1 christos { 251 1.1 christos return regexec (preg, string, nmatch, pmatch, 252 1.1 christos eflags & (REG_NOTBOL | REG_NOTEOL)); 253 1.1 christos } 254 1.1 christos compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); 255 1.1 christos # endif 256 1.1 christos #endif 257 1.1 christos 258 1.1 christos /* Entry points for GNU code. */ 259 1.1 christos 260 1.1 christos /* re_match, re_search, re_match_2, re_search_2 261 1.1 christos 262 1.1 christos The former two functions operate on STRING with length LENGTH, 263 1.1 christos while the later two operate on concatenation of STRING1 and STRING2 264 1.1 christos with lengths LENGTH1 and LENGTH2, respectively. 265 1.1 christos 266 1.1 christos re_match() matches the compiled pattern in BUFP against the string, 267 1.1 christos starting at index START. 268 1.1 christos 269 1.1 christos re_search() first tries matching at index START, then it tries to match 270 1.1 christos starting from index START + 1, and so on. The last start position tried 271 1.1 christos is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same 272 1.1 christos way as re_match().) 273 1.1 christos 274 1.1 christos The parameter STOP of re_{match,search}_2 specifies that no match exceeding 275 1.1 christos the first STOP characters of the concatenation of the strings should be 276 1.1 christos concerned. 277 1.1 christos 278 1.1 christos If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match 279 1.1 christos and all groups is stroed in REGS. (For the "_2" variants, the offsets are 280 1.1 christos computed relative to the concatenation, not relative to the individual 281 1.1 christos strings.) 282 1.1 christos 283 1.1 christos On success, re_match* functions return the length of the match, re_search* 284 1.1 christos return the position of the start of the match. Return value -1 means no 285 1.1 christos match was found and -2 indicates an internal error. */ 286 1.1 christos 287 1.1 christos regoff_t 288 1.1 christos re_match (struct re_pattern_buffer *bufp, const char *string, 289 1.1 christos Idx length, Idx start, struct re_registers *regs) 290 1.1 christos { 291 1.1 christos return re_search_stub (bufp, string, length, start, 0, length, regs, true); 292 1.1 christos } 293 1.1 christos #ifdef _LIBC 294 1.1 christos weak_alias (__re_match, re_match) 295 1.1 christos #endif 296 1.1 christos 297 1.1 christos regoff_t 298 1.1 christos re_search (struct re_pattern_buffer *bufp, const char *string, 299 1.1 christos Idx length, Idx start, regoff_t range, struct re_registers *regs) 300 1.1 christos { 301 1.1 christos return re_search_stub (bufp, string, length, start, range, length, regs, 302 1.1 christos false); 303 1.1 christos } 304 1.1 christos #ifdef _LIBC 305 1.1 christos weak_alias (__re_search, re_search) 306 1.1 christos #endif 307 1.1 christos 308 1.1 christos regoff_t 309 1.1 christos re_match_2 (struct re_pattern_buffer *bufp, 310 1.1 christos const char *string1, Idx length1, 311 1.1 christos const char *string2, Idx length2, 312 1.1 christos Idx start, struct re_registers *regs, Idx stop) 313 1.1 christos { 314 1.1 christos return re_search_2_stub (bufp, string1, length1, string2, length2, 315 1.1 christos start, 0, regs, stop, true); 316 1.1 christos } 317 1.1 christos #ifdef _LIBC 318 1.1 christos weak_alias (__re_match_2, re_match_2) 319 1.1 christos #endif 320 1.1 christos 321 1.1 christos regoff_t 322 1.1 christos re_search_2 (struct re_pattern_buffer *bufp, 323 1.1 christos const char *string1, Idx length1, 324 1.1 christos const char *string2, Idx length2, 325 1.1 christos Idx start, regoff_t range, struct re_registers *regs, Idx stop) 326 1.1 christos { 327 1.1 christos return re_search_2_stub (bufp, string1, length1, string2, length2, 328 1.1 christos start, range, regs, stop, false); 329 1.1 christos } 330 1.1 christos #ifdef _LIBC 331 1.1 christos weak_alias (__re_search_2, re_search_2) 332 1.1 christos #endif 333 1.1 christos 334 1.1 christos static regoff_t 335 1.1 christos internal_function 336 1.1 christos re_search_2_stub (struct re_pattern_buffer *bufp, 337 1.1 christos const char *string1, Idx length1, 338 1.1 christos const char *string2, Idx length2, 339 1.1 christos Idx start, regoff_t range, struct re_registers *regs, 340 1.1 christos Idx stop, bool ret_len) 341 1.1 christos { 342 1.1 christos const char *str; 343 1.1 christos regoff_t rval; 344 1.1 christos Idx len = length1 + length2; 345 1.1 christos char *s = NULL; 346 1.1 christos 347 1.1 christos if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) 348 1.1 christos return -2; 349 1.1 christos 350 1.1 christos /* Concatenate the strings. */ 351 1.1 christos if (length2 > 0) 352 1.1 christos if (length1 > 0) 353 1.1 christos { 354 1.1 christos s = re_malloc (char, len); 355 1.1 christos 356 1.1 christos if (BE (s == NULL, 0)) 357 1.1 christos return -2; 358 1.1 christos memcpy (s, string1, length1); 359 1.1 christos memcpy (s + length1, string2, length2); 360 1.1 christos str = s; 361 1.1 christos } 362 1.1 christos else 363 1.1 christos str = string2; 364 1.1 christos else 365 1.1 christos str = string1; 366 1.1 christos 367 1.1 christos rval = re_search_stub (bufp, str, len, start, range, stop, regs, 368 1.1 christos ret_len); 369 1.1 christos re_free (s); 370 1.1 christos return rval; 371 1.1 christos } 372 1.1 christos 373 1.1 christos /* The parameters have the same meaning as those of re_search. 374 1.1 christos Additional parameters: 375 1.1 christos If RET_LEN is true the length of the match is returned (re_match style); 376 1.1 christos otherwise the position of the match is returned. */ 377 1.1 christos 378 1.1 christos static regoff_t 379 1.1 christos internal_function 380 1.1 christos re_search_stub (struct re_pattern_buffer *bufp, 381 1.1 christos const char *string, Idx length, 382 1.1 christos Idx start, regoff_t range, Idx stop, struct re_registers *regs, 383 1.1 christos bool ret_len) 384 1.1 christos { 385 1.1 christos reg_errcode_t result; 386 1.1 christos regmatch_t *pmatch; 387 1.1 christos Idx nregs; 388 1.1 christos regoff_t rval; 389 1.1 christos int eflags = 0; 390 1.1 christos #ifdef _LIBC 391 1.1 christos re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer; 392 1.1 christos #endif 393 1.1 christos Idx last_start = start + range; 394 1.1 christos 395 1.1 christos /* Check for out-of-range. */ 396 1.1 christos if (BE (start < 0 || start > length, 0)) 397 1.1 christos return -1; 398 1.1 christos if (sizeof start < sizeof range) 399 1.1 christos { 400 1.1 christos regoff_t length_offset = length; 401 1.1 christos regoff_t start_offset = start; 402 1.1 christos if (BE (length_offset - start_offset < range, 0)) 403 1.1 christos last_start = length; 404 1.1 christos else if (BE (range < - start_offset, 0)) 405 1.1 christos last_start = 0; 406 1.1 christos } 407 1.1 christos else 408 1.1 christos { 409 1.1 christos if (BE ((last_start < start) != (range < 0), 0)) 410 1.1 christos { 411 1.1 christos /* Overflow occurred when computing last_start; substitute 412 1.1 christos the extreme value. */ 413 1.1 christos last_start = range < 0 ? 0 : length; 414 1.1 christos } 415 1.1 christos else 416 1.1 christos { 417 1.1 christos if (BE (length < last_start, 0)) 418 1.1 christos last_start = length; 419 1.1 christos else if (BE (last_start < 0, 0)) 420 1.1 christos last_start = 0; 421 1.1 christos } 422 1.1 christos } 423 1.1 christos 424 1.1 christos __libc_lock_lock (dfa->lock); 425 1.1 christos 426 1.1 christos eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0; 427 1.1 christos eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0; 428 1.1 christos 429 1.1 christos /* Compile fastmap if we haven't yet. */ 430 1.1 christos if (start < last_start && bufp->re_fastmap != NULL 431 1.1 christos && !bufp->re_fastmap_accurate) 432 1.1 christos re_compile_fastmap (bufp); 433 1.1 christos 434 1.1 christos if (BE (bufp->re_no_sub, 0)) 435 1.1 christos regs = NULL; 436 1.1 christos 437 1.1 christos /* We need at least 1 register. */ 438 1.1 christos if (regs == NULL) 439 1.1 christos nregs = 1; 440 1.1 christos else if (BE (bufp->re_regs_allocated == REG_FIXED 441 1.1 christos && regs->rm_num_regs <= bufp->re_nsub, 0)) 442 1.1 christos { 443 1.1 christos nregs = regs->rm_num_regs; 444 1.1 christos if (BE (nregs < 1, 0)) 445 1.1 christos { 446 1.1 christos /* Nothing can be copied to regs. */ 447 1.1 christos regs = NULL; 448 1.1 christos nregs = 1; 449 1.1 christos } 450 1.1 christos } 451 1.1 christos else 452 1.1 christos nregs = bufp->re_nsub + 1; 453 1.1 christos pmatch = re_xmalloc (regmatch_t, nregs); 454 1.1 christos if (BE (pmatch == NULL, 0)) 455 1.1 christos { 456 1.1 christos rval = -2; 457 1.1 christos goto out; 458 1.1 christos } 459 1.1 christos 460 1.1 christos result = re_search_internal (bufp, string, length, start, last_start, stop, 461 1.1 christos nregs, pmatch, eflags); 462 1.1 christos 463 1.1 christos rval = 0; 464 1.1 christos 465 1.1 christos /* I hope we needn't fill ther regs with -1's when no match was found. */ 466 1.1 christos if (result != REG_NOERROR) 467 1.1 christos rval = -1; 468 1.1 christos else if (regs != NULL) 469 1.1 christos { 470 1.1 christos /* If caller wants register contents data back, copy them. */ 471 1.1 christos bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs, 472 1.1 christos bufp->re_regs_allocated); 473 1.1 christos if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0)) 474 1.1 christos rval = -2; 475 1.1 christos } 476 1.1 christos 477 1.1 christos if (BE (rval == 0, 1)) 478 1.1 christos { 479 1.1 christos if (ret_len) 480 1.1 christos { 481 1.1 christos assert (pmatch[0].rm_so == start); 482 1.1 christos rval = pmatch[0].rm_eo - start; 483 1.1 christos } 484 1.1 christos else 485 1.1 christos rval = pmatch[0].rm_so; 486 1.1 christos } 487 1.1 christos re_free (pmatch); 488 1.1 christos out: 489 1.1 christos __libc_lock_unlock (dfa->lock); 490 1.1 christos return rval; 491 1.1 christos } 492 1.1 christos 493 1.1 christos static unsigned 494 1.1 christos internal_function 495 1.1 christos re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, 496 1.1 christos int regs_allocated) 497 1.1 christos { 498 1.1 christos int rval = REG_REALLOCATE; 499 1.1 christos Idx i; 500 1.1 christos Idx need_regs = nregs + 1; 501 1.1 christos /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code 502 1.1 christos uses. */ 503 1.1 christos 504 1.1 christos /* Have the register data arrays been allocated? */ 505 1.1 christos if (regs_allocated == REG_UNALLOCATED) 506 1.1 christos { /* No. So allocate them with malloc. */ 507 1.1 christos regs->rm_start = re_xmalloc (regoff_t, need_regs); 508 1.1 christos regs->rm_end = re_malloc (regoff_t, need_regs); 509 1.1 christos if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0)) 510 1.1 christos return REG_UNALLOCATED; 511 1.1 christos regs->rm_num_regs = need_regs; 512 1.1 christos } 513 1.1 christos else if (regs_allocated == REG_REALLOCATE) 514 1.1 christos { /* Yes. If we need more elements than were already 515 1.1 christos allocated, reallocate them. If we need fewer, just 516 1.1 christos leave it alone. */ 517 1.1 christos if (BE (need_regs > regs->rm_num_regs, 0)) 518 1.1 christos { 519 1.1 christos regoff_t *new_start = 520 1.1 christos re_xrealloc (regs->rm_start, regoff_t, need_regs); 521 1.1 christos regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs); 522 1.1 christos if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) 523 1.1 christos return REG_UNALLOCATED; 524 1.1 christos regs->rm_start = new_start; 525 1.1 christos regs->rm_end = new_end; 526 1.1 christos regs->rm_num_regs = need_regs; 527 1.1 christos } 528 1.1 christos } 529 1.1 christos else 530 1.1 christos { 531 1.1 christos assert (regs_allocated == REG_FIXED); 532 1.1 christos /* This function may not be called with REG_FIXED and nregs too big. */ 533 1.1 christos assert (regs->rm_num_regs >= nregs); 534 1.1 christos rval = REG_FIXED; 535 1.1 christos } 536 1.1 christos 537 1.1 christos /* Copy the regs. */ 538 1.1 christos for (i = 0; i < nregs; ++i) 539 1.1 christos { 540 1.1 christos regs->rm_start[i] = pmatch[i].rm_so; 541 1.1 christos regs->rm_end[i] = pmatch[i].rm_eo; 542 1.1 christos } 543 1.1 christos for ( ; i < regs->rm_num_regs; ++i) 544 1.1 christos regs->rm_start[i] = regs->rm_end[i] = -1; 545 1.1 christos 546 1.1 christos return rval; 547 1.1 christos } 548 1.1 christos 549 1.1 christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 550 1.1 christos ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use 551 1.1 christos this memory for recording register information. STARTS and ENDS 552 1.1 christos must be allocated using the malloc library routine, and must each 553 1.1 christos be at least NUM_REGS * sizeof (regoff_t) bytes long. 554 1.1 christos 555 1.1 christos If NUM_REGS == 0, then subsequent matches should allocate their own 556 1.1 christos register data. 557 1.1 christos 558 1.1 christos Unless this function is called, the first search or match using 559 1.1 christos PATTERN_BUFFER will allocate its own register data, without 560 1.1 christos freeing the old data. */ 561 1.1 christos 562 1.1 christos void 563 1.1 christos re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, 564 1.1 christos __re_size_t num_regs, regoff_t *starts, regoff_t *ends) 565 1.1 christos { 566 1.1 christos if (num_regs) 567 1.1 christos { 568 1.1 christos bufp->re_regs_allocated = REG_REALLOCATE; 569 1.1 christos regs->rm_num_regs = num_regs; 570 1.1 christos regs->rm_start = starts; 571 1.1 christos regs->rm_end = ends; 572 1.1 christos } 573 1.1 christos else 574 1.1 christos { 575 1.1 christos bufp->re_regs_allocated = REG_UNALLOCATED; 576 1.1 christos regs->rm_num_regs = 0; 577 1.1 christos regs->rm_start = regs->rm_end = NULL; 578 1.1 christos } 579 1.1 christos } 580 1.1 christos #ifdef _LIBC 581 1.1 christos weak_alias (__re_set_registers, re_set_registers) 582 1.1 christos #endif 583 1.1 christos 584 1.1 christos /* Entry points compatible with 4.2 BSD regex library. We don't define 586 1.1 christos them unless specifically requested. */ 587 1.1 christos 588 1.1 christos #if defined _REGEX_RE_COMP || defined _LIBC 589 1.1 christos int 590 1.1 christos # ifdef _LIBC 591 1.1 christos weak_function 592 1.1 christos # endif 593 1.1 christos re_exec (const char *s) 594 1.1 christos { 595 1.1 christos return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); 596 1.1 christos } 597 1.1 christos #endif /* _REGEX_RE_COMP */ 598 1.1 christos 599 1.1 christos /* Internal entry point. */ 601 1.1 christos 602 1.1 christos /* Searches for a compiled pattern PREG in the string STRING, whose 603 1.1 christos length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same 604 1.1 christos meaning as with regexec. LAST_START is START + RANGE, where 605 1.1 christos START and RANGE have the same meaning as with re_search. 606 1.1 christos Return REG_NOERROR if we find a match, and REG_NOMATCH if not, 607 1.1 christos otherwise return the error code. 608 1.1 christos Note: We assume front end functions already check ranges. 609 1.1 christos (0 <= LAST_START && LAST_START <= LENGTH) */ 610 1.1 christos 611 1.1 christos static reg_errcode_t 612 1.1 christos internal_function 613 1.1 christos re_search_internal (const regex_t *preg, 614 1.1 christos const char *string, Idx length, 615 1.1 christos Idx start, Idx last_start, Idx stop, 616 1.1 christos size_t nmatch, regmatch_t pmatch[], 617 1.1 christos int eflags) 618 1.1 christos { 619 1.1 christos reg_errcode_t err; 620 1.1 christos re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; 621 1.1 christos Idx left_lim, right_lim; 622 1.1 christos int incr; 623 1.1 christos bool fl_longest_match; 624 1.1 christos int match_kind; 625 1.1 christos Idx match_first, match_last = REG_MISSING; 626 1.1 christos Idx extra_nmatch; 627 1.1 christos bool sb; 628 1.1 christos int ch; 629 1.1 christos #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) 630 1.1 christos re_match_context_t mctx = { .dfa = dfa }; 631 1.1 christos #else 632 1.1 christos re_match_context_t mctx; 633 1.1 christos #endif 634 1.1 christos char *fastmap = ((preg->re_fastmap != NULL && preg->re_fastmap_accurate 635 1.1 christos && start != last_start && !preg->re_can_be_null) 636 1.1 christos ? preg->re_fastmap : NULL); 637 1.1 christos unsigned REG_TRANSLATE_TYPE t = 638 1.1 christos (unsigned REG_TRANSLATE_TYPE) preg->re_translate; 639 1.1 christos 640 1.1 christos #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) 641 1.1 christos memset (&mctx, '\0', sizeof (re_match_context_t)); 642 1.1 christos mctx.dfa = dfa; 643 1.1 christos #endif 644 1.1 christos 645 1.1 christos extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; 646 1.1 christos nmatch -= extra_nmatch; 647 1.1 christos 648 1.1 christos /* Check if the DFA haven't been compiled. */ 649 1.1 christos if (BE (preg->re_used == 0 || dfa->init_state == NULL 650 1.1 christos || dfa->init_state_word == NULL || dfa->init_state_nl == NULL 651 1.1 christos || dfa->init_state_begbuf == NULL, 0)) 652 1.1 christos return REG_NOMATCH; 653 1.1 christos 654 1.1 christos #ifdef DEBUG 655 1.1 christos /* We assume front-end functions already check them. */ 656 1.1 christos assert (0 <= last_start && last_start <= length); 657 1.1 christos #endif 658 1.1 christos 659 1.1 christos /* If initial states with non-begbuf contexts have no elements, 660 1.1 christos the regex must be anchored. If preg->re_newline_anchor is set, 661 1.1 christos we'll never use init_state_nl, so do not check it. */ 662 1.1 christos if (dfa->init_state->nodes.nelem == 0 663 1.1 christos && dfa->init_state_word->nodes.nelem == 0 664 1.1 christos && (dfa->init_state_nl->nodes.nelem == 0 665 1.1 christos || !preg->re_newline_anchor)) 666 1.1 christos { 667 1.1 christos if (start != 0 && last_start != 0) 668 1.1 christos return REG_NOMATCH; 669 1.1 christos start = last_start = 0; 670 1.1 christos } 671 1.1 christos 672 1.1 christos /* We must check the longest matching, if nmatch > 0. */ 673 1.1 christos fl_longest_match = (nmatch != 0 || dfa->nbackref); 674 1.1 christos 675 1.1 christos err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, 676 1.1 christos preg->re_translate, 677 1.1 christos preg->re_syntax & REG_IGNORE_CASE, dfa); 678 1.1 christos if (BE (err != REG_NOERROR, 0)) 679 1.1 christos goto free_return; 680 1.1 christos mctx.input.stop = stop; 681 1.1 christos mctx.input.raw_stop = stop; 682 1.1 christos mctx.input.newline_anchor = preg->re_newline_anchor; 683 1.1 christos 684 1.1 christos err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); 685 1.1 christos if (BE (err != REG_NOERROR, 0)) 686 1.1 christos goto free_return; 687 1.1 christos 688 1.1 christos /* We will log all the DFA states through which the dfa pass, 689 1.1 christos if nmatch > 1, or this dfa has "multibyte node", which is a 690 1.1 christos back-reference or a node which can accept multibyte character or 691 1.1 christos multi character collating element. */ 692 1.1 christos if (nmatch > 1 || dfa->has_mb_node) 693 1.1 christos { 694 1.1 christos mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1); 695 1.1 christos if (BE (mctx.state_log == NULL, 0)) 696 1.1 christos { 697 1.1 christos err = REG_ESPACE; 698 1.1 christos goto free_return; 699 1.1 christos } 700 1.1 christos } 701 1.1 christos else 702 1.1 christos mctx.state_log = NULL; 703 1.1 christos 704 1.1 christos match_first = start; 705 1.1 christos mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF 706 1.1 christos : CONTEXT_NEWLINE | CONTEXT_BEGBUF; 707 1.1 christos 708 1.1 christos /* Check incrementally whether of not the input string match. */ 709 1.1 christos incr = (last_start < start) ? -1 : 1; 710 1.1 christos left_lim = (last_start < start) ? last_start : start; 711 1.1 christos right_lim = (last_start < start) ? start : last_start; 712 1.1 christos sb = dfa->mb_cur_max == 1; 713 1.1 christos match_kind = 714 1.1 christos (fastmap 715 1.1 christos ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0) 716 1.1 christos | (start <= last_start ? 2 : 0) 717 1.1 christos | (t != NULL ? 1 : 0)) 718 1.1 christos : 8); 719 1.1 christos 720 1.1 christos for (;; match_first += incr) 721 1.1 christos { 722 1.1 christos err = REG_NOMATCH; 723 1.1 christos if (match_first < left_lim || right_lim < match_first) 724 1.1 christos goto free_return; 725 1.1 christos 726 1.1 christos /* Advance as rapidly as possible through the string, until we 727 1.1 christos find a plausible place to start matching. This may be done 728 1.1 christos with varying efficiency, so there are various possibilities: 729 1.1 christos only the most common of them are specialized, in order to 730 1.1 christos save on code size. We use a switch statement for speed. */ 731 1.1 christos switch (match_kind) 732 1.1 christos { 733 1.1 christos case 8: 734 1.1 christos /* No fastmap. */ 735 1.1 christos break; 736 1.1 christos 737 1.1 christos case 7: 738 1.1 christos /* Fastmap with single-byte translation, match forward. */ 739 1.1 christos while (BE (match_first < right_lim, 1) 740 1.1 christos && !fastmap[t[(unsigned char) string[match_first]]]) 741 1.1 christos ++match_first; 742 1.1 christos goto forward_match_found_start_or_reached_end; 743 1.1 christos 744 1.1 christos case 6: 745 1.1 christos /* Fastmap without translation, match forward. */ 746 1.1 christos while (BE (match_first < right_lim, 1) 747 1.1 christos && !fastmap[(unsigned char) string[match_first]]) 748 1.1 christos ++match_first; 749 1.1 christos 750 1.1 christos forward_match_found_start_or_reached_end: 751 1.1 christos if (BE (match_first == right_lim, 0)) 752 1.1 christos { 753 1.1 christos ch = match_first >= length 754 1.1 christos ? 0 : (unsigned char) string[match_first]; 755 1.1 christos if (!fastmap[t ? t[ch] : ch]) 756 1.1 christos goto free_return; 757 1.1 christos } 758 1.1 christos break; 759 1.1 christos 760 1.1 christos case 4: 761 1.1 christos case 5: 762 1.1 christos /* Fastmap without multi-byte translation, match backwards. */ 763 1.1 christos while (match_first >= left_lim) 764 1.1 christos { 765 1.1 christos ch = match_first >= length 766 1.1 christos ? 0 : (unsigned char) string[match_first]; 767 1.1 christos if (fastmap[t ? t[ch] : ch]) 768 1.1 christos break; 769 1.1 christos --match_first; 770 1.1 christos } 771 1.1 christos if (match_first < left_lim) 772 1.1 christos goto free_return; 773 1.1 christos break; 774 1.1 christos 775 1.1 christos default: 776 1.1 christos /* In this case, we can't determine easily the current byte, 777 1.1 christos since it might be a component byte of a multibyte 778 1.1 christos character. Then we use the constructed buffer instead. */ 779 1.1 christos for (;;) 780 1.1 christos { 781 1.1 christos /* If MATCH_FIRST is out of the valid range, reconstruct the 782 1.1 christos buffers. */ 783 1.1 christos __re_size_t offset = match_first - mctx.input.raw_mbs_idx; 784 1.1 christos if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) 785 1.1 christos { 786 1.1 christos err = re_string_reconstruct (&mctx.input, match_first, 787 1.1 christos eflags); 788 1.1 christos if (BE (err != REG_NOERROR, 0)) 789 1.1 christos goto free_return; 790 1.1 christos 791 1.1 christos offset = match_first - mctx.input.raw_mbs_idx; 792 1.1 christos } 793 1.1 christos /* If MATCH_FIRST is out of the buffer, leave it as '\0'. 794 1.1 christos Note that MATCH_FIRST must not be smaller than 0. */ 795 1.1 christos ch = (match_first >= length 796 1.1 christos ? 0 : re_string_byte_at (&mctx.input, offset)); 797 1.1 christos if (fastmap[ch]) 798 1.1 christos break; 799 1.1 christos match_first += incr; 800 1.1 christos if (match_first < left_lim || match_first > right_lim) 801 1.1 christos { 802 1.1 christos err = REG_NOMATCH; 803 1.1 christos goto free_return; 804 1.1 christos } 805 1.1 christos } 806 1.1 christos break; 807 1.1 christos } 808 1.1 christos 809 1.1 christos /* Reconstruct the buffers so that the matcher can assume that 810 1.1 christos the matching starts from the beginning of the buffer. */ 811 1.1 christos err = re_string_reconstruct (&mctx.input, match_first, eflags); 812 1.1 christos if (BE (err != REG_NOERROR, 0)) 813 1.1 christos goto free_return; 814 1.1 christos 815 1.1 christos #ifdef RE_ENABLE_I18N 816 1.1 christos /* Don't consider this char as a possible match start if it part, 817 1.1 christos yet isn't the head, of a multibyte character. */ 818 1.1 christos if (!sb && !re_string_first_byte (&mctx.input, 0)) 819 1.1 christos continue; 820 1.1 christos #endif 821 1.1 christos 822 1.1 christos /* It seems to be appropriate one, then use the matcher. */ 823 1.1 christos /* We assume that the matching starts from 0. */ 824 1.1 christos mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; 825 1.1 christos match_last = check_matching (&mctx, fl_longest_match, 826 1.1 christos start <= last_start ? &match_first : NULL); 827 1.1 christos if (match_last != REG_MISSING) 828 1.1 christos { 829 1.1 christos if (BE (match_last == REG_ERROR, 0)) 830 1.1 christos { 831 1.1 christos err = REG_ESPACE; 832 1.1 christos goto free_return; 833 1.1 christos } 834 1.1 christos else 835 1.1 christos { 836 1.1 christos mctx.match_last = match_last; 837 1.1 christos if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref) 838 1.1 christos { 839 1.1 christos re_dfastate_t *pstate = mctx.state_log[match_last]; 840 1.1 christos mctx.last_node = check_halt_state_context (&mctx, pstate, 841 1.1 christos match_last); 842 1.1 christos } 843 1.1 christos if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match) 844 1.1 christos || dfa->nbackref) 845 1.1 christos { 846 1.1 christos err = prune_impossible_nodes (&mctx); 847 1.1 christos if (err == REG_NOERROR) 848 1.1 christos break; 849 1.1 christos if (BE (err != REG_NOMATCH, 0)) 850 1.1 christos goto free_return; 851 1.1 christos match_last = REG_MISSING; 852 1.1 christos } 853 1.1 christos else 854 1.1 christos break; /* We found a match. */ 855 1.1 christos } 856 1.1 christos } 857 1.1 christos 858 1.1 christos match_ctx_clean (&mctx); 859 1.1 christos } 860 1.1 christos 861 1.1 christos #ifdef DEBUG 862 1.1 christos assert (match_last != REG_MISSING); 863 1.1 christos assert (err == REG_NOERROR); 864 1.1 christos #endif 865 1.1 christos 866 1.1 christos /* Set pmatch[] if we need. */ 867 1.1 christos if (nmatch > 0) 868 1.1 christos { 869 1.1 christos Idx reg_idx; 870 1.1 christos 871 1.1 christos /* Initialize registers. */ 872 1.1 christos for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) 873 1.1 christos pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; 874 1.1 christos 875 1.1 christos /* Set the points where matching start/end. */ 876 1.1 christos pmatch[0].rm_so = 0; 877 1.1 christos pmatch[0].rm_eo = mctx.match_last; 878 1.1 christos /* FIXME: This function should fail if mctx.match_last exceeds 879 1.1 christos the maximum possible regoff_t value. We need a new error 880 1.1 christos code REG_OVERFLOW. */ 881 1.1 christos 882 1.1 christos if (!preg->re_no_sub && nmatch > 1) 883 1.1 christos { 884 1.1 christos err = set_regs (preg, &mctx, nmatch, pmatch, 885 1.1 christos dfa->has_plural_match && dfa->nbackref > 0); 886 1.1 christos if (BE (err != REG_NOERROR, 0)) 887 1.1 christos goto free_return; 888 1.1 christos } 889 1.1 christos 890 1.1 christos /* At last, add the offset to the each registers, since we slided 891 1.1 christos the buffers so that we could assume that the matching starts 892 1.1 christos from 0. */ 893 1.1 christos for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 894 1.1 christos if (pmatch[reg_idx].rm_so != -1) 895 1.1 christos { 896 1.1 christos #ifdef RE_ENABLE_I18N 897 1.1 christos if (BE (mctx.input.offsets_needed != 0, 0)) 898 1.1 christos { 899 1.1 christos pmatch[reg_idx].rm_so = 900 1.1 christos (pmatch[reg_idx].rm_so == mctx.input.valid_len 901 1.1 christos ? mctx.input.valid_raw_len 902 1.1 christos : mctx.input.offsets[pmatch[reg_idx].rm_so]); 903 1.1 christos pmatch[reg_idx].rm_eo = 904 1.1 christos (pmatch[reg_idx].rm_eo == mctx.input.valid_len 905 1.1 christos ? mctx.input.valid_raw_len 906 1.1 christos : mctx.input.offsets[pmatch[reg_idx].rm_eo]); 907 1.1 christos } 908 1.1 christos #else 909 1.1 christos assert (mctx.input.offsets_needed == 0); 910 1.1 christos #endif 911 1.1 christos pmatch[reg_idx].rm_so += match_first; 912 1.1 christos pmatch[reg_idx].rm_eo += match_first; 913 1.1 christos } 914 1.1 christos for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) 915 1.1 christos { 916 1.1 christos pmatch[nmatch + reg_idx].rm_so = -1; 917 1.1 christos pmatch[nmatch + reg_idx].rm_eo = -1; 918 1.1 christos } 919 1.1 christos 920 1.1 christos if (dfa->subexp_map) 921 1.1 christos for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) 922 1.1 christos if (dfa->subexp_map[reg_idx] != reg_idx) 923 1.1 christos { 924 1.1 christos pmatch[reg_idx + 1].rm_so 925 1.1 christos = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; 926 1.1 christos pmatch[reg_idx + 1].rm_eo 927 1.1 christos = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; 928 1.1 christos } 929 1.1 christos } 930 1.1 christos 931 1.1 christos free_return: 932 1.1 christos re_free (mctx.state_log); 933 1.1 christos if (dfa->nbackref) 934 1.1 christos match_ctx_free (&mctx); 935 1.1 christos re_string_destruct (&mctx.input); 936 1.1 christos return err; 937 1.1 christos } 938 1.1 christos 939 1.1 christos static reg_errcode_t 940 1.1 christos internal_function 941 1.1 christos prune_impossible_nodes (re_match_context_t *mctx) 942 1.1 christos { 943 1.1 christos re_dfa_t *const dfa = mctx->dfa; 944 1.1 christos Idx halt_node, match_last; 945 1.1 christos reg_errcode_t ret; 946 1.1 christos re_dfastate_t **sifted_states; 947 1.1 christos re_dfastate_t **lim_states = NULL; 948 1.1 christos re_sift_context_t sctx; 949 1.1 christos #ifdef DEBUG 950 1.1 christos assert (mctx->state_log != NULL); 951 1.1 christos #endif 952 1.1 christos match_last = mctx->match_last; 953 1.1 christos halt_node = mctx->last_node; 954 1.1 christos sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1); 955 1.1 christos if (BE (sifted_states == NULL, 0)) 956 1.1 christos { 957 1.1 christos ret = REG_ESPACE; 958 1.1 christos goto free_return; 959 1.1 christos } 960 1.1 christos if (dfa->nbackref) 961 1.1 christos { 962 1.1 christos lim_states = re_xmalloc (re_dfastate_t *, match_last + 1); 963 1.1 christos if (BE (lim_states == NULL, 0)) 964 1.1 christos { 965 1.1 christos ret = REG_ESPACE; 966 1.1 christos goto free_return; 967 1.1 christos } 968 1.1 christos while (1) 969 1.1 christos { 970 1.1 christos memset (lim_states, '\0', 971 1.1 christos sizeof (re_dfastate_t *) * (match_last + 1)); 972 1.1 christos sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, 973 1.1 christos match_last); 974 1.1 christos ret = sift_states_backward (mctx, &sctx); 975 1.1 christos re_node_set_free (&sctx.limits); 976 1.1 christos if (BE (ret != REG_NOERROR, 0)) 977 1.1 christos goto free_return; 978 1.1 christos if (sifted_states[0] != NULL || lim_states[0] != NULL) 979 1.1 christos break; 980 1.1 christos do 981 1.1 christos { 982 1.1 christos --match_last; 983 1.1 christos if (! REG_VALID_INDEX (match_last)) 984 1.1 christos { 985 1.1 christos ret = REG_NOMATCH; 986 1.1 christos goto free_return; 987 1.1 christos } 988 1.1 christos } while (mctx->state_log[match_last] == NULL 989 1.1 christos || !mctx->state_log[match_last]->halt); 990 1.1 christos halt_node = check_halt_state_context (mctx, 991 1.1 christos mctx->state_log[match_last], 992 1.1 christos match_last); 993 1.1 christos } 994 1.1 christos ret = merge_state_array (dfa, sifted_states, lim_states, 995 1.1 christos match_last + 1); 996 1.1 christos re_free (lim_states); 997 1.1 christos lim_states = NULL; 998 1.1 christos if (BE (ret != REG_NOERROR, 0)) 999 1.1 christos goto free_return; 1000 1.1 christos } 1001 1.1 christos else 1002 1.1 christos { 1003 1.1 christos sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); 1004 1.1 christos ret = sift_states_backward (mctx, &sctx); 1005 1.1 christos re_node_set_free (&sctx.limits); 1006 1.1 christos if (BE (ret != REG_NOERROR, 0)) 1007 1.1 christos goto free_return; 1008 1.1 christos } 1009 1.1 christos re_free (mctx->state_log); 1010 1.1 christos mctx->state_log = sifted_states; 1011 1.1 christos sifted_states = NULL; 1012 1.1 christos mctx->last_node = halt_node; 1013 1.1 christos mctx->match_last = match_last; 1014 1.1 christos ret = REG_NOERROR; 1015 1.1 christos free_return: 1016 1.1 christos re_free (sifted_states); 1017 1.1 christos re_free (lim_states); 1018 1.1 christos return ret; 1019 1.1 christos } 1020 1.1 christos 1021 1.1 christos /* Acquire an initial state and return it. 1022 1.1 christos We must select appropriate initial state depending on the context, 1023 1.1 christos since initial states may have constraints like "\<", "^", etc.. */ 1024 1.1 christos 1025 1.1 christos static inline re_dfastate_t * 1026 1.1 christos __attribute ((always_inline)) internal_function 1027 1.1 christos acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, 1028 1.1 christos Idx idx) 1029 1.1 christos { 1030 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1031 1.1 christos if (dfa->init_state->has_constraint) 1032 1.1 christos { 1033 1.1 christos unsigned int context; 1034 1.1 christos context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); 1035 1.1 christos if (IS_WORD_CONTEXT (context)) 1036 1.1 christos return dfa->init_state_word; 1037 1.1 christos else if (IS_ORDINARY_CONTEXT (context)) 1038 1.1 christos return dfa->init_state; 1039 1.1 christos else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) 1040 1.1 christos return dfa->init_state_begbuf; 1041 1.1 christos else if (IS_NEWLINE_CONTEXT (context)) 1042 1.1 christos return dfa->init_state_nl; 1043 1.1 christos else if (IS_BEGBUF_CONTEXT (context)) 1044 1.1 christos { 1045 1.1 christos /* It is relatively rare case, then calculate on demand. */ 1046 1.1 christos return re_acquire_state_context (err, dfa, 1047 1.1 christos dfa->init_state->entrance_nodes, 1048 1.1 christos context); 1049 1.1 christos } 1050 1.1 christos else 1051 1.1 christos /* Must not happen? */ 1052 1.1 christos return dfa->init_state; 1053 1.1 christos } 1054 1.1 christos else 1055 1.1 christos return dfa->init_state; 1056 1.1 christos } 1057 1.1 christos 1058 1.1 christos /* Check whether the regular expression match input string INPUT or not, 1059 1.1 christos and return the index where the matching end. Return REG_MISSING if 1060 1.1 christos there is no match, and return REG_ERROR in case of an error. 1061 1.1 christos FL_LONGEST_MATCH means we want the POSIX longest matching. 1062 1.1 christos If P_MATCH_FIRST is not NULL, and the match fails, it is set to the 1063 1.1 christos next place where we may want to try matching. 1064 1.1 christos Note that the matcher assume that the maching starts from the current 1065 1.1 christos index of the buffer. */ 1066 1.1 christos 1067 1.1 christos static Idx 1068 1.1 christos internal_function 1069 1.1 christos check_matching (re_match_context_t *mctx, bool fl_longest_match, 1070 1.1 christos Idx *p_match_first) 1071 1.1 christos { 1072 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1073 1.1 christos reg_errcode_t err; 1074 1.1 christos Idx match = 0; 1075 1.1 christos Idx match_last = REG_MISSING; 1076 1.1 christos Idx cur_str_idx = re_string_cur_idx (&mctx->input); 1077 1.1 christos re_dfastate_t *cur_state; 1078 1.1 christos bool at_init_state = p_match_first != NULL; 1079 1.1 christos Idx next_start_idx = cur_str_idx; 1080 1.1 christos 1081 1.1 christos err = REG_NOERROR; 1082 1.1 christos cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); 1083 1.1 christos /* An initial state must not be NULL (invalid). */ 1084 1.1 christos if (BE (cur_state == NULL, 0)) 1085 1.1 christos { 1086 1.1 christos assert (err == REG_ESPACE); 1087 1.1 christos return REG_ERROR; 1088 1.1 christos } 1089 1.1 christos 1090 1.1 christos if (mctx->state_log != NULL) 1091 1.1 christos { 1092 1.1 christos mctx->state_log[cur_str_idx] = cur_state; 1093 1.1 christos 1094 1.1 christos /* Check OP_OPEN_SUBEXP in the initial state in case that we use them 1095 1.1 christos later. E.g. Processing back references. */ 1096 1.1 christos if (BE (dfa->nbackref, 0)) 1097 1.1 christos { 1098 1.1 christos at_init_state = false; 1099 1.1 christos err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); 1100 1.1 christos if (BE (err != REG_NOERROR, 0)) 1101 1.1 christos return err; 1102 1.1 christos 1103 1.1 christos if (cur_state->has_backref) 1104 1.1 christos { 1105 1.1 christos err = transit_state_bkref (mctx, &cur_state->nodes); 1106 1.1 christos if (BE (err != REG_NOERROR, 0)) 1107 1.1 christos return err; 1108 1.1 christos } 1109 1.1 christos } 1110 1.1 christos } 1111 1.1 christos 1112 1.1 christos /* If the RE accepts NULL string. */ 1113 1.1 christos if (BE (cur_state->halt, 0)) 1114 1.1 christos { 1115 1.1 christos if (!cur_state->has_constraint 1116 1.1 christos || check_halt_state_context (mctx, cur_state, cur_str_idx)) 1117 1.1 christos { 1118 1.1 christos if (!fl_longest_match) 1119 1.1 christos return cur_str_idx; 1120 1.1 christos else 1121 1.1 christos { 1122 1.1 christos match_last = cur_str_idx; 1123 1.1 christos match = 1; 1124 1.1 christos } 1125 1.1 christos } 1126 1.1 christos } 1127 1.1 christos 1128 1.1 christos while (!re_string_eoi (&mctx->input)) 1129 1.1 christos { 1130 1.1 christos re_dfastate_t *old_state = cur_state; 1131 1.1 christos Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; 1132 1.1 christos 1133 1.1 christos if (BE (next_char_idx >= mctx->input.bufs_len, 0) 1134 1.1 christos || (BE (next_char_idx >= mctx->input.valid_len, 0) 1135 1.1 christos && mctx->input.valid_len < mctx->input.len)) 1136 1.1 christos { 1137 1.1 christos err = extend_buffers (mctx); 1138 1.1 christos if (BE (err != REG_NOERROR, 0)) 1139 1.1 christos { 1140 1.1 christos assert (err == REG_ESPACE); 1141 1.1 christos return REG_ERROR; 1142 1.1 christos } 1143 1.1 christos } 1144 1.1 christos 1145 1.1 christos cur_state = transit_state (&err, mctx, cur_state); 1146 1.1 christos if (mctx->state_log != NULL) 1147 1.1 christos cur_state = merge_state_with_log (&err, mctx, cur_state); 1148 1.1 christos 1149 1.1 christos if (cur_state == NULL) 1150 1.1 christos { 1151 1.1 christos /* Reached the invalid state or an error. Try to recover a valid 1152 1.1 christos state using the state log, if available and if we have not 1153 1.1 christos already found a valid (even if not the longest) match. */ 1154 1.1 christos if (BE (err != REG_NOERROR, 0)) 1155 1.1 christos return REG_ERROR; 1156 1.1 christos 1157 1.1 christos if (mctx->state_log == NULL 1158 1.1 christos || (match && !fl_longest_match) 1159 1.1 christos || (cur_state = find_recover_state (&err, mctx)) == NULL) 1160 1.1 christos break; 1161 1.1 christos } 1162 1.1 christos 1163 1.1 christos if (BE (at_init_state, 0)) 1164 1.1 christos { 1165 1.1 christos if (old_state == cur_state) 1166 1.1 christos next_start_idx = next_char_idx; 1167 1.1 christos else 1168 1.1 christos at_init_state = false; 1169 1.1 christos } 1170 1.1 christos 1171 1.1 christos if (cur_state->halt) 1172 1.1 christos { 1173 1.1 christos /* Reached a halt state. 1174 1.1 christos Check the halt state can satisfy the current context. */ 1175 1.1 christos if (!cur_state->has_constraint 1176 1.1 christos || check_halt_state_context (mctx, cur_state, 1177 1.1 christos re_string_cur_idx (&mctx->input))) 1178 1.1 christos { 1179 1.1 christos /* We found an appropriate halt state. */ 1180 1.1 christos match_last = re_string_cur_idx (&mctx->input); 1181 1.1 christos match = 1; 1182 1.1 christos 1183 1.1 christos /* We found a match, do not modify match_first below. */ 1184 1.1 christos p_match_first = NULL; 1185 1.1 christos if (!fl_longest_match) 1186 1.1 christos break; 1187 1.1 christos } 1188 1.1 christos } 1189 1.1 christos } 1190 1.1 christos 1191 1.1 christos if (p_match_first) 1192 1.1 christos *p_match_first += next_start_idx; 1193 1.1 christos 1194 1.1 christos return match_last; 1195 1.1 christos } 1196 1.1 christos 1197 1.1 christos /* Check NODE match the current context. */ 1198 1.1 christos 1199 1.1 christos static bool 1200 1.1 christos internal_function 1201 1.1 christos check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) 1202 1.1 christos { 1203 1.1 christos re_token_type_t type = dfa->nodes[node].type; 1204 1.1 christos unsigned int constraint = dfa->nodes[node].constraint; 1205 1.1 christos if (type != END_OF_RE) 1206 1.1 christos return false; 1207 1.1 christos if (!constraint) 1208 1.1 christos return true; 1209 1.1 christos if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) 1210 1.1 christos return false; 1211 1.1 christos return true; 1212 1.1 christos } 1213 1.1 christos 1214 1.1 christos /* Check the halt state STATE match the current context. 1215 1.1 christos Return 0 if not match, if the node, STATE has, is a halt node and 1216 1.1 christos match the context, return the node. */ 1217 1.1 christos 1218 1.1 christos static Idx 1219 1.1 christos internal_function 1220 1.1 christos check_halt_state_context (const re_match_context_t *mctx, 1221 1.1 christos const re_dfastate_t *state, Idx idx) 1222 1.1 christos { 1223 1.1 christos Idx i; 1224 1.1 christos unsigned int context; 1225 1.1 christos #ifdef DEBUG 1226 1.1 christos assert (state->halt); 1227 1.1 christos #endif 1228 1.1 christos context = re_string_context_at (&mctx->input, idx, mctx->eflags); 1229 1.1 christos for (i = 0; i < state->nodes.nelem; ++i) 1230 1.1 christos if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) 1231 1.1 christos return state->nodes.elems[i]; 1232 1.1 christos return 0; 1233 1.1 christos } 1234 1.1 christos 1235 1.1 christos /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA 1236 1.1 christos corresponding to the DFA). 1237 1.1 christos Return the destination node, and update EPS_VIA_NODES; 1238 1.1 christos return REG_MISSING in case of errors. */ 1239 1.1 christos 1240 1.1 christos static Idx 1241 1.1 christos internal_function 1242 1.1 christos proceed_next_node (const re_match_context_t *mctx, 1243 1.1 christos Idx nregs, regmatch_t *regs, Idx *pidx, Idx node, 1244 1.1 christos re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) 1245 1.1 christos { 1246 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1247 1.1 christos Idx i; 1248 1.1 christos bool ok; 1249 1.1 christos if (IS_EPSILON_NODE (dfa->nodes[node].type)) 1250 1.1 christos { 1251 1.1 christos re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; 1252 1.1 christos re_node_set *edests = &dfa->edests[node]; 1253 1.1 christos Idx dest_node; 1254 1.1 christos ok = re_node_set_insert (eps_via_nodes, node); 1255 1.1 christos if (BE (! ok, 0)) 1256 1.1 christos return REG_ERROR; 1257 1.1 christos /* Pick up a valid destination, or return REG_MISSING if none 1258 1.1 christos is found. */ 1259 1.1 christos for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i) 1260 1.1 christos { 1261 1.1 christos Idx candidate = edests->elems[i]; 1262 1.1 christos if (!re_node_set_contains (cur_nodes, candidate)) 1263 1.1 christos continue; 1264 1.1 christos if (dest_node == REG_MISSING) 1265 1.1 christos dest_node = candidate; 1266 1.1 christos 1267 1.1 christos else 1268 1.1 christos { 1269 1.1 christos /* In order to avoid infinite loop like "(a*)*", return the second 1270 1.1 christos epsilon-transition if the first was already considered. */ 1271 1.1 christos if (re_node_set_contains (eps_via_nodes, dest_node)) 1272 1.1 christos return candidate; 1273 1.1 christos 1274 1.1 christos /* Otherwise, push the second epsilon-transition on the fail stack. */ 1275 1.1 christos else if (fs != NULL 1276 1.1 christos && push_fail_stack (fs, *pidx, candidate, nregs, regs, 1277 1.1 christos eps_via_nodes)) 1278 1.1 christos return REG_ERROR; 1279 1.1 christos 1280 1.1 christos /* We know we are going to exit. */ 1281 1.1 christos break; 1282 1.1 christos } 1283 1.1 christos } 1284 1.1 christos return dest_node; 1285 1.1 christos } 1286 1.1 christos else 1287 1.1 christos { 1288 1.1 christos Idx naccepted = 0; 1289 1.1 christos re_token_type_t type = dfa->nodes[node].type; 1290 1.1 christos 1291 1.1 christos #ifdef RE_ENABLE_I18N 1292 1.1 christos if (dfa->nodes[node].accept_mb) 1293 1.1 christos naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); 1294 1.1 christos else 1295 1.1 christos #endif /* RE_ENABLE_I18N */ 1296 1.1 christos if (type == OP_BACK_REF) 1297 1.1 christos { 1298 1.1 christos Idx subexp_idx = dfa->nodes[node].opr.idx + 1; 1299 1.1 christos naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; 1300 1.1 christos if (fs != NULL) 1301 1.1 christos { 1302 1.1 christos if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) 1303 1.1 christos return REG_MISSING; 1304 1.1 christos else if (naccepted) 1305 1.1 christos { 1306 1.1 christos char *buf = (char *) re_string_get_buffer (&mctx->input); 1307 1.1 christos if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, 1308 1.1 christos naccepted) != 0) 1309 1.1 christos return REG_MISSING; 1310 1.1 christos } 1311 1.1 christos } 1312 1.1 christos 1313 1.1 christos if (naccepted == 0) 1314 1.1 christos { 1315 1.1 christos Idx dest_node; 1316 1.1 christos ok = re_node_set_insert (eps_via_nodes, node); 1317 1.1 christos if (BE (! ok, 0)) 1318 1.1 christos return REG_ERROR; 1319 1.1 christos dest_node = dfa->edests[node].elems[0]; 1320 1.1 christos if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1321 1.1 christos dest_node)) 1322 1.1 christos return dest_node; 1323 1.1 christos } 1324 1.1 christos } 1325 1.1 christos 1326 1.1 christos if (naccepted != 0 1327 1.1 christos || check_node_accept (mctx, dfa->nodes + node, *pidx)) 1328 1.1 christos { 1329 1.1 christos Idx dest_node = dfa->nexts[node]; 1330 1.1 christos *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; 1331 1.1 christos if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL 1332 1.1 christos || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, 1333 1.1 christos dest_node))) 1334 1.1 christos return REG_MISSING; 1335 1.1 christos re_node_set_empty (eps_via_nodes); 1336 1.1 christos return dest_node; 1337 1.1 christos } 1338 1.1 christos } 1339 1.1 christos return REG_MISSING; 1340 1.1 christos } 1341 1.1 christos 1342 1.1 christos static reg_errcode_t 1343 1.1 christos internal_function 1344 1.1 christos push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, 1345 1.1 christos Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1346 1.1 christos { 1347 1.1 christos reg_errcode_t err; 1348 1.1 christos Idx num = fs->num++; 1349 1.1 christos if (fs->num == fs->alloc) 1350 1.1 christos { 1351 1.1 christos struct re_fail_stack_ent_t *new_array = 1352 1.1 christos re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc); 1353 1.1 christos if (new_array == NULL) 1354 1.1 christos return REG_ESPACE; 1355 1.1 christos fs->stack = new_array; 1356 1.1 christos } 1357 1.1 christos fs->stack[num].idx = str_idx; 1358 1.1 christos fs->stack[num].node = dest_node; 1359 1.1 christos fs->stack[num].regs = re_xmalloc (regmatch_t, nregs); 1360 1.1 christos if (fs->stack[num].regs == NULL) 1361 1.1 christos return REG_ESPACE; 1362 1.1 christos memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); 1363 1.1 christos err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); 1364 1.1 christos return err; 1365 1.1 christos } 1366 1.1 christos 1367 1.1 christos static Idx 1368 1.1 christos internal_function 1369 1.1 christos pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, 1370 1.1 christos Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) 1371 1.1 christos { 1372 1.1 christos Idx num = --fs->num; 1373 1.1 christos assert (REG_VALID_INDEX (num)); 1374 1.1 christos *pidx = fs->stack[num].idx; 1375 1.1 christos memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); 1376 1.1 christos re_node_set_free (eps_via_nodes); 1377 1.1 christos re_free (fs->stack[num].regs); 1378 1.1 christos *eps_via_nodes = fs->stack[num].eps_via_nodes; 1379 1.1 christos return fs->stack[num].node; 1380 1.1 christos } 1381 1.1 christos 1382 1.1 christos /* Set the positions where the subexpressions are starts/ends to registers 1383 1.1 christos PMATCH. 1384 1.1 christos Note: We assume that pmatch[0] is already set, and 1385 1.1 christos pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ 1386 1.1 christos 1387 1.1 christos static reg_errcode_t 1388 1.1 christos internal_function 1389 1.1 christos set_regs (const regex_t *preg, const re_match_context_t *mctx, 1390 1.1 christos size_t nmatch, regmatch_t *pmatch, bool fl_backtrack) 1391 1.1 christos { 1392 1.1 christos re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; 1393 1.1 christos Idx idx, cur_node; 1394 1.1 christos re_node_set eps_via_nodes; 1395 1.1 christos struct re_fail_stack_t *fs; 1396 1.1 christos struct re_fail_stack_t fs_body = { 0, 2, NULL }; 1397 1.1 christos regmatch_t *prev_idx_match; 1398 1.1 christos bool prev_idx_match_malloced = false; 1399 1.1 christos 1400 1.1 christos #ifdef DEBUG 1401 1.1 christos assert (nmatch > 1); 1402 1.1 christos assert (mctx->state_log != NULL); 1403 1.1 christos #endif 1404 1.1 christos if (fl_backtrack) 1405 1.1 christos { 1406 1.1 christos fs = &fs_body; 1407 1.1 christos fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc); 1408 1.1 christos if (fs->stack == NULL) 1409 1.1 christos return REG_ESPACE; 1410 1.1 christos } 1411 1.1 christos else 1412 1.1 christos fs = NULL; 1413 1.1 christos 1414 1.1 christos cur_node = dfa->init_node; 1415 1.1 christos re_node_set_init_empty (&eps_via_nodes); 1416 1.1 christos 1417 1.1 christos if (re_alloc_oversized (nmatch, sizeof (regmatch_t))) 1418 1.1 christos { 1419 1.2 christos free_fail_stack_return (fs); 1420 1.1 christos return REG_ESPACE; 1421 1.1 christos } 1422 1.1 christos #ifndef __SSP__ 1423 1.2 christos if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) 1424 1.1 christos prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); 1425 1.1 christos else 1426 1.1 christos #endif 1427 1.1 christos { 1428 1.1 christos prev_idx_match = re_malloc (regmatch_t, nmatch); 1429 1.1 christos if (prev_idx_match == NULL) 1430 1.1 christos { 1431 1.1 christos free_fail_stack_return (fs); 1432 1.1 christos return REG_ESPACE; 1433 1.1 christos } 1434 1.1 christos prev_idx_match_malloced = true; 1435 1.1 christos } 1436 1.1 christos memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1437 1.1 christos 1438 1.1 christos for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) 1439 1.1 christos { 1440 1.1 christos update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); 1441 1.1 christos 1442 1.1 christos if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) 1443 1.1 christos { 1444 1.1 christos Idx reg_idx; 1445 1.1 christos if (fs) 1446 1.1 christos { 1447 1.1 christos for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) 1448 1.1 christos if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) 1449 1.1 christos break; 1450 1.1 christos if (reg_idx == nmatch) 1451 1.1 christos { 1452 1.1 christos re_node_set_free (&eps_via_nodes); 1453 1.1 christos if (prev_idx_match_malloced) 1454 1.1 christos re_free (prev_idx_match); 1455 1.1 christos return free_fail_stack_return (fs); 1456 1.1 christos } 1457 1.1 christos cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1458 1.1 christos &eps_via_nodes); 1459 1.1 christos } 1460 1.1 christos else 1461 1.1 christos { 1462 1.1 christos re_node_set_free (&eps_via_nodes); 1463 1.1 christos if (prev_idx_match_malloced) 1464 1.1 christos re_free (prev_idx_match); 1465 1.1 christos return REG_NOERROR; 1466 1.1 christos } 1467 1.1 christos } 1468 1.1 christos 1469 1.1 christos /* Proceed to next node. */ 1470 1.1 christos cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, 1471 1.1 christos &eps_via_nodes, fs); 1472 1.1 christos 1473 1.1 christos if (BE (! REG_VALID_INDEX (cur_node), 0)) 1474 1.1 christos { 1475 1.1 christos if (BE (cur_node == REG_ERROR, 0)) 1476 1.1 christos { 1477 1.1 christos re_node_set_free (&eps_via_nodes); 1478 1.1 christos if (prev_idx_match_malloced) 1479 1.1 christos re_free (prev_idx_match); 1480 1.1 christos free_fail_stack_return (fs); 1481 1.1 christos return REG_ESPACE; 1482 1.1 christos } 1483 1.1 christos if (fs) 1484 1.1 christos cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, 1485 1.1 christos &eps_via_nodes); 1486 1.1 christos else 1487 1.1 christos { 1488 1.1 christos re_node_set_free (&eps_via_nodes); 1489 1.1 christos if (prev_idx_match_malloced) 1490 1.1 christos re_free (prev_idx_match); 1491 1.1 christos return REG_NOMATCH; 1492 1.1 christos } 1493 1.1 christos } 1494 1.1 christos } 1495 1.1 christos re_node_set_free (&eps_via_nodes); 1496 1.1 christos if (prev_idx_match_malloced) 1497 1.1 christos re_free (prev_idx_match); 1498 1.1 christos return free_fail_stack_return (fs); 1499 1.1 christos } 1500 1.1 christos 1501 1.1 christos static reg_errcode_t 1502 1.1 christos internal_function 1503 1.1 christos free_fail_stack_return (struct re_fail_stack_t *fs) 1504 1.1 christos { 1505 1.1 christos if (fs) 1506 1.1 christos { 1507 1.1 christos Idx fs_idx; 1508 1.1 christos for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) 1509 1.1 christos { 1510 1.1 christos re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); 1511 1.1 christos re_free (fs->stack[fs_idx].regs); 1512 1.1 christos } 1513 1.1 christos re_free (fs->stack); 1514 1.1 christos } 1515 1.1 christos return REG_NOERROR; 1516 1.1 christos } 1517 1.1 christos 1518 1.1 christos static void 1519 1.1 christos internal_function 1520 1.1 christos update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, 1521 1.1 christos Idx cur_node, Idx cur_idx, Idx nmatch) 1522 1.1 christos { 1523 1.1 christos int type = dfa->nodes[cur_node].type; 1524 1.1 christos if (type == OP_OPEN_SUBEXP) 1525 1.1 christos { 1526 1.1 christos Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1527 1.1 christos 1528 1.1 christos /* We are at the first node of this sub expression. */ 1529 1.1 christos if (reg_num < nmatch) 1530 1.1 christos { 1531 1.1 christos pmatch[reg_num].rm_so = cur_idx; 1532 1.1 christos pmatch[reg_num].rm_eo = -1; 1533 1.1 christos } 1534 1.1 christos } 1535 1.1 christos else if (type == OP_CLOSE_SUBEXP) 1536 1.1 christos { 1537 1.1 christos Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; 1538 1.1 christos if (reg_num < nmatch) 1539 1.1 christos { 1540 1.1 christos /* We are at the last node of this sub expression. */ 1541 1.1 christos if (pmatch[reg_num].rm_so < cur_idx) 1542 1.1 christos { 1543 1.1 christos pmatch[reg_num].rm_eo = cur_idx; 1544 1.1 christos /* This is a non-empty match or we are not inside an optional 1545 1.1 christos subexpression. Accept this right away. */ 1546 1.1 christos memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); 1547 1.1 christos } 1548 1.1 christos else 1549 1.1 christos { 1550 1.1 christos if (dfa->nodes[cur_node].opt_subexp 1551 1.1 christos && prev_idx_match[reg_num].rm_so != -1) 1552 1.1 christos /* We transited through an empty match for an optional 1553 1.1 christos subexpression, like (a?)*, and this is not the subexp's 1554 1.1 christos first match. Copy back the old content of the registers 1555 1.1 christos so that matches of an inner subexpression are undone as 1556 1.1 christos well, like in ((a?))*. */ 1557 1.1 christos memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); 1558 1.1 christos else 1559 1.1 christos /* We completed a subexpression, but it may be part of 1560 1.1 christos an optional one, so do not update PREV_IDX_MATCH. */ 1561 1.1 christos pmatch[reg_num].rm_eo = cur_idx; 1562 1.1 christos } 1563 1.1 christos } 1564 1.1 christos } 1565 1.1 christos } 1566 1.1 christos 1567 1.1 christos /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 1568 1.1 christos and sift the nodes in each states according to the following rules. 1569 1.1 christos Updated state_log will be wrote to STATE_LOG. 1570 1.1 christos 1571 1.1 christos Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1572 1.1 christos 1. When STR_IDX == MATCH_LAST(the last index in the state_log): 1573 1.1 christos If `a' isn't the LAST_NODE and `a' can't epsilon transit to 1574 1.1 christos the LAST_NODE, we throw away the node `a'. 1575 1.1 christos 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts 1576 1.1 christos string `s' and transit to `b': 1577 1.1 christos i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw 1578 1.1 christos away the node `a'. 1579 1.1 christos ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is 1580 1.1 christos thrown away, we throw away the node `a'. 1581 1.1 christos 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': 1582 1.1 christos i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the 1583 1.1 christos node `a'. 1584 1.1 christos ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, 1585 1.1 christos we throw away the node `a'. */ 1586 1.1 christos 1587 1.1 christos #define STATE_NODE_CONTAINS(state,node) \ 1588 1.1 christos ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) 1589 1.1 christos 1590 1.1 christos static reg_errcode_t 1591 1.1 christos internal_function 1592 1.1 christos sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx) 1593 1.1 christos { 1594 1.1 christos reg_errcode_t err; 1595 1.1 christos int null_cnt = 0; 1596 1.1 christos Idx str_idx = sctx->last_str_idx; 1597 1.1 christos re_node_set cur_dest; 1598 1.1 christos 1599 1.1 christos #ifdef DEBUG 1600 1.1 christos assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); 1601 1.1 christos #endif 1602 1.1 christos 1603 1.1 christos /* Build sifted state_log[str_idx]. It has the nodes which can epsilon 1604 1.1 christos transit to the last_node and the last_node itself. */ 1605 1.1 christos err = re_node_set_init_1 (&cur_dest, sctx->last_node); 1606 1.1 christos if (BE (err != REG_NOERROR, 0)) 1607 1.1 christos return err; 1608 1.1 christos err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1609 1.1 christos if (BE (err != REG_NOERROR, 0)) 1610 1.1 christos goto free_return; 1611 1.1 christos 1612 1.1 christos /* Then check each states in the state_log. */ 1613 1.1 christos while (str_idx > 0) 1614 1.1 christos { 1615 1.1 christos /* Update counters. */ 1616 1.1 christos null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; 1617 1.1 christos if (null_cnt > mctx->max_mb_elem_len) 1618 1.1 christos { 1619 1.1 christos memset (sctx->sifted_states, '\0', 1620 1.1 christos sizeof (re_dfastate_t *) * str_idx); 1621 1.1 christos re_node_set_free (&cur_dest); 1622 1.1 christos return REG_NOERROR; 1623 1.1 christos } 1624 1.1 christos re_node_set_empty (&cur_dest); 1625 1.1 christos --str_idx; 1626 1.1 christos 1627 1.1 christos if (mctx->state_log[str_idx]) 1628 1.1 christos { 1629 1.1 christos err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); 1630 1.1 christos if (BE (err != REG_NOERROR, 0)) 1631 1.1 christos goto free_return; 1632 1.1 christos } 1633 1.1 christos 1634 1.1 christos /* Add all the nodes which satisfy the following conditions: 1635 1.1 christos - It can epsilon transit to a node in CUR_DEST. 1636 1.1 christos - It is in CUR_SRC. 1637 1.1 christos And update state_log. */ 1638 1.1 christos err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); 1639 1.1 christos if (BE (err != REG_NOERROR, 0)) 1640 1.1 christos goto free_return; 1641 1.1 christos } 1642 1.1 christos err = REG_NOERROR; 1643 1.1 christos free_return: 1644 1.1 christos re_node_set_free (&cur_dest); 1645 1.1 christos return err; 1646 1.1 christos } 1647 1.1 christos 1648 1.1 christos static reg_errcode_t 1649 1.1 christos internal_function 1650 1.1 christos build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx, 1651 1.1 christos Idx str_idx, re_node_set *cur_dest) 1652 1.1 christos { 1653 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1654 1.1 christos re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; 1655 1.1 christos Idx i; 1656 1.1 christos 1657 1.1 christos /* Then build the next sifted state. 1658 1.1 christos We build the next sifted state on `cur_dest', and update 1659 1.1 christos `sifted_states[str_idx]' with `cur_dest'. 1660 1.1 christos Note: 1661 1.1 christos `cur_dest' is the sifted state from `state_log[str_idx + 1]'. 1662 1.1 christos `cur_src' points the node_set of the old `state_log[str_idx]' 1663 1.1 christos (with the epsilon nodes pre-filtered out). */ 1664 1.1 christos for (i = 0; i < cur_src->nelem; i++) 1665 1.1 christos { 1666 1.1 christos Idx prev_node = cur_src->elems[i]; 1667 1.1 christos int naccepted = 0; 1668 1.1 christos bool ok; 1669 1.1 christos 1670 1.1 christos #ifdef DEBUG 1671 1.1 christos re_token_type_t type = dfa->nodes[prev_node].type; 1672 1.1 christos assert (!IS_EPSILON_NODE (type)); 1673 1.1 christos #endif 1674 1.1 christos #ifdef RE_ENABLE_I18N 1675 1.1 christos /* If the node may accept `multi byte'. */ 1676 1.1 christos if (dfa->nodes[prev_node].accept_mb) 1677 1.1 christos naccepted = sift_states_iter_mb (mctx, sctx, prev_node, 1678 1.1 christos str_idx, sctx->last_str_idx); 1679 1.1 christos #endif /* RE_ENABLE_I18N */ 1680 1.1 christos 1681 1.1 christos /* We don't check backreferences here. 1682 1.1 christos See update_cur_sifted_state(). */ 1683 1.1 christos if (!naccepted 1684 1.1 christos && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) 1685 1.1 christos && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], 1686 1.1 christos dfa->nexts[prev_node])) 1687 1.1 christos naccepted = 1; 1688 1.1 christos 1689 1.1 christos if (naccepted == 0) 1690 1.1 christos continue; 1691 1.1 christos 1692 1.1 christos if (sctx->limits.nelem) 1693 1.1 christos { 1694 1.1 christos Idx to_idx = str_idx + naccepted; 1695 1.1 christos if (check_dst_limits (mctx, &sctx->limits, 1696 1.1 christos dfa->nexts[prev_node], to_idx, 1697 1.1 christos prev_node, str_idx)) 1698 1.1 christos continue; 1699 1.1 christos } 1700 1.1 christos ok = re_node_set_insert (cur_dest, prev_node); 1701 1.1 christos if (BE (! ok, 0)) 1702 1.1 christos return REG_ESPACE; 1703 1.1 christos } 1704 1.1 christos 1705 1.1 christos return REG_NOERROR; 1706 1.1 christos } 1707 1.1 christos 1708 1.1 christos /* Helper functions. */ 1709 1.1 christos 1710 1.1 christos static reg_errcode_t 1711 1.1 christos internal_function 1712 1.1 christos clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) 1713 1.1 christos { 1714 1.1 christos Idx top = mctx->state_log_top; 1715 1.1 christos 1716 1.1 christos if (next_state_log_idx >= mctx->input.bufs_len 1717 1.1 christos || (next_state_log_idx >= mctx->input.valid_len 1718 1.1 christos && mctx->input.valid_len < mctx->input.len)) 1719 1.1 christos { 1720 1.1 christos reg_errcode_t err; 1721 1.1 christos err = extend_buffers (mctx); 1722 1.1 christos if (BE (err != REG_NOERROR, 0)) 1723 1.1 christos return err; 1724 1.1 christos } 1725 1.1 christos 1726 1.1 christos if (top < next_state_log_idx) 1727 1.1 christos { 1728 1.1 christos memset (mctx->state_log + top + 1, '\0', 1729 1.1 christos sizeof (re_dfastate_t *) * (next_state_log_idx - top)); 1730 1.1 christos mctx->state_log_top = next_state_log_idx; 1731 1.1 christos } 1732 1.1 christos return REG_NOERROR; 1733 1.1 christos } 1734 1.1 christos 1735 1.1 christos static reg_errcode_t 1736 1.1 christos internal_function 1737 1.1 christos merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src, 1738 1.1 christos Idx num) 1739 1.1 christos { 1740 1.1 christos Idx st_idx; 1741 1.1 christos reg_errcode_t err; 1742 1.1 christos for (st_idx = 0; st_idx < num; ++st_idx) 1743 1.1 christos { 1744 1.1 christos if (dst[st_idx] == NULL) 1745 1.1 christos dst[st_idx] = src[st_idx]; 1746 1.1 christos else if (src[st_idx] != NULL) 1747 1.1 christos { 1748 1.1 christos re_node_set merged_set; 1749 1.1 christos err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, 1750 1.1 christos &src[st_idx]->nodes); 1751 1.1 christos if (BE (err != REG_NOERROR, 0)) 1752 1.1 christos return err; 1753 1.1 christos dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); 1754 1.1 christos re_node_set_free (&merged_set); 1755 1.1 christos if (BE (err != REG_NOERROR, 0)) 1756 1.1 christos return err; 1757 1.1 christos } 1758 1.1 christos } 1759 1.1 christos return REG_NOERROR; 1760 1.1 christos } 1761 1.1 christos 1762 1.1 christos static reg_errcode_t 1763 1.1 christos internal_function 1764 1.1 christos update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx, 1765 1.1 christos Idx str_idx, re_node_set *dest_nodes) 1766 1.1 christos { 1767 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1768 1.1 christos reg_errcode_t err; 1769 1.1 christos const re_node_set *candidates; 1770 1.1 christos candidates = ((mctx->state_log[str_idx] == NULL) ? NULL 1771 1.1 christos : &mctx->state_log[str_idx]->nodes); 1772 1.1 christos 1773 1.1 christos if (dest_nodes->nelem == 0) 1774 1.1 christos sctx->sifted_states[str_idx] = NULL; 1775 1.1 christos else 1776 1.1 christos { 1777 1.1 christos if (candidates) 1778 1.1 christos { 1779 1.1 christos /* At first, add the nodes which can epsilon transit to a node in 1780 1.1 christos DEST_NODE. */ 1781 1.1 christos err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); 1782 1.1 christos if (BE (err != REG_NOERROR, 0)) 1783 1.1 christos return err; 1784 1.1 christos 1785 1.1 christos /* Then, check the limitations in the current sift_context. */ 1786 1.1 christos if (sctx->limits.nelem) 1787 1.1 christos { 1788 1.1 christos err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, 1789 1.1 christos mctx->bkref_ents, str_idx); 1790 1.1 christos if (BE (err != REG_NOERROR, 0)) 1791 1.1 christos return err; 1792 1.1 christos } 1793 1.1 christos } 1794 1.1 christos 1795 1.1 christos sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); 1796 1.1 christos if (BE (err != REG_NOERROR, 0)) 1797 1.1 christos return err; 1798 1.1 christos } 1799 1.1 christos 1800 1.1 christos if (candidates && mctx->state_log[str_idx]->has_backref) 1801 1.1 christos { 1802 1.1 christos err = sift_states_bkref (mctx, sctx, str_idx, candidates); 1803 1.1 christos if (BE (err != REG_NOERROR, 0)) 1804 1.1 christos return err; 1805 1.1 christos } 1806 1.1 christos return REG_NOERROR; 1807 1.1 christos } 1808 1.1 christos 1809 1.1 christos static reg_errcode_t 1810 1.1 christos internal_function 1811 1.1 christos add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes, 1812 1.1 christos const re_node_set *candidates) 1813 1.1 christos { 1814 1.1 christos reg_errcode_t err = REG_NOERROR; 1815 1.1 christos Idx i; 1816 1.1 christos 1817 1.1 christos re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); 1818 1.1 christos if (BE (err != REG_NOERROR, 0)) 1819 1.1 christos return err; 1820 1.1 christos 1821 1.1 christos if (!state->inveclosure.alloc) 1822 1.1 christos { 1823 1.1 christos err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); 1824 1.1 christos if (BE (err != REG_NOERROR, 0)) 1825 1.1 christos return REG_ESPACE; 1826 1.1 christos for (i = 0; i < dest_nodes->nelem; i++) 1827 1.1 christos re_node_set_merge (&state->inveclosure, 1828 1.1 christos dfa->inveclosures + dest_nodes->elems[i]); 1829 1.1 christos } 1830 1.1 christos return re_node_set_add_intersect (dest_nodes, candidates, 1831 1.1 christos &state->inveclosure); 1832 1.1 christos } 1833 1.1 christos 1834 1.1 christos static reg_errcode_t 1835 1.1 christos internal_function 1836 1.1 christos sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, 1837 1.1 christos const re_node_set *candidates) 1838 1.1 christos { 1839 1.1 christos Idx ecl_idx; 1840 1.1 christos reg_errcode_t err; 1841 1.1 christos re_node_set *inv_eclosure = dfa->inveclosures + node; 1842 1.1 christos re_node_set except_nodes; 1843 1.1 christos re_node_set_init_empty (&except_nodes); 1844 1.1 christos for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1845 1.1 christos { 1846 1.1 christos Idx cur_node = inv_eclosure->elems[ecl_idx]; 1847 1.1 christos if (cur_node == node) 1848 1.1 christos continue; 1849 1.1 christos if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) 1850 1.1 christos { 1851 1.1 christos Idx edst1 = dfa->edests[cur_node].elems[0]; 1852 1.1 christos Idx edst2 = ((dfa->edests[cur_node].nelem > 1) 1853 1.1 christos ? dfa->edests[cur_node].elems[1] : REG_MISSING); 1854 1.1 christos if ((!re_node_set_contains (inv_eclosure, edst1) 1855 1.1 christos && re_node_set_contains (dest_nodes, edst1)) 1856 1.1 christos || (REG_VALID_NONZERO_INDEX (edst2) 1857 1.1 christos && !re_node_set_contains (inv_eclosure, edst2) 1858 1.1 christos && re_node_set_contains (dest_nodes, edst2))) 1859 1.1 christos { 1860 1.1 christos err = re_node_set_add_intersect (&except_nodes, candidates, 1861 1.1 christos dfa->inveclosures + cur_node); 1862 1.1 christos if (BE (err != REG_NOERROR, 0)) 1863 1.1 christos { 1864 1.1 christos re_node_set_free (&except_nodes); 1865 1.1 christos return err; 1866 1.1 christos } 1867 1.1 christos } 1868 1.1 christos } 1869 1.1 christos } 1870 1.1 christos for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) 1871 1.1 christos { 1872 1.1 christos Idx cur_node = inv_eclosure->elems[ecl_idx]; 1873 1.1 christos if (!re_node_set_contains (&except_nodes, cur_node)) 1874 1.1 christos { 1875 1.1 christos Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; 1876 1.1 christos re_node_set_remove_at (dest_nodes, idx); 1877 1.1 christos } 1878 1.1 christos } 1879 1.1 christos re_node_set_free (&except_nodes); 1880 1.1 christos return REG_NOERROR; 1881 1.1 christos } 1882 1.1 christos 1883 1.1 christos static bool 1884 1.1 christos internal_function 1885 1.1 christos check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, 1886 1.1 christos Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) 1887 1.1 christos { 1888 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1889 1.1 christos Idx lim_idx, src_pos, dst_pos; 1890 1.1 christos 1891 1.1 christos Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); 1892 1.1 christos Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); 1893 1.1 christos for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 1894 1.1 christos { 1895 1.1 christos Idx subexp_idx; 1896 1.1 christos struct re_backref_cache_entry *ent; 1897 1.1 christos ent = mctx->bkref_ents + limits->elems[lim_idx]; 1898 1.1 christos subexp_idx = dfa->nodes[ent->node].opr.idx; 1899 1.1 christos 1900 1.1 christos dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], 1901 1.1 christos subexp_idx, dst_node, dst_idx, 1902 1.1 christos dst_bkref_idx); 1903 1.1 christos src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], 1904 1.1 christos subexp_idx, src_node, src_idx, 1905 1.1 christos src_bkref_idx); 1906 1.1 christos 1907 1.1 christos /* In case of: 1908 1.1 christos <src> <dst> ( <subexp> ) 1909 1.1 christos ( <subexp> ) <src> <dst> 1910 1.1 christos ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ 1911 1.1 christos if (src_pos == dst_pos) 1912 1.1 christos continue; /* This is unrelated limitation. */ 1913 1.1 christos else 1914 1.1 christos return true; 1915 1.1 christos } 1916 1.1 christos return false; 1917 1.1 christos } 1918 1.1 christos 1919 1.1 christos static int 1920 1.1 christos internal_function 1921 1.1 christos check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, 1922 1.1 christos Idx subexp_idx, Idx from_node, Idx bkref_idx) 1923 1.1 christos { 1924 1.1 christos re_dfa_t *const dfa = mctx->dfa; 1925 1.1 christos re_node_set *eclosures = dfa->eclosures + from_node; 1926 1.1 christos Idx node_idx; 1927 1.1 christos 1928 1.1 christos /* Else, we are on the boundary: examine the nodes on the epsilon 1929 1.1 christos closure. */ 1930 1.1 christos for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) 1931 1.1 christos { 1932 1.1 christos Idx node = eclosures->elems[node_idx]; 1933 1.1 christos switch (dfa->nodes[node].type) 1934 1.1 christos { 1935 1.1 christos case OP_BACK_REF: 1936 1.1 christos if (bkref_idx != REG_MISSING) 1937 1.1 christos { 1938 1.1 christos struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; 1939 1.1 christos do 1940 1.1 christos { 1941 1.1 christos Idx dst; 1942 1.1 christos int cpos; 1943 1.1 christos 1944 1.1 christos if (ent->node != node) 1945 1.1 christos continue; 1946 1.1 christos 1947 1.1 christos if (subexp_idx < BITSET_WORD_BITS 1948 1.1 christos && !(ent->eps_reachable_subexps_map 1949 1.1 christos & ((bitset_word) 1 << subexp_idx))) 1950 1.1 christos continue; 1951 1.1 christos 1952 1.1 christos /* Recurse trying to reach the OP_OPEN_SUBEXP and 1953 1.1 christos OP_CLOSE_SUBEXP cases below. But, if the 1954 1.1 christos destination node is the same node as the source 1955 1.1 christos node, don't recurse because it would cause an 1956 1.1 christos infinite loop: a regex that exhibits this behavior 1957 1.1 christos is ()\1*\1* */ 1958 1.1 christos dst = dfa->edests[node].elems[0]; 1959 1.1 christos if (dst == from_node) 1960 1.1 christos { 1961 1.1 christos if (boundaries & 1) 1962 1.1 christos return -1; 1963 1.1 christos else /* if (boundaries & 2) */ 1964 1.1 christos return 0; 1965 1.1 christos } 1966 1.1 christos 1967 1.1 christos cpos = 1968 1.1 christos check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, 1969 1.1 christos dst, bkref_idx); 1970 1.1 christos if (cpos == -1 /* && (boundaries & 1) */) 1971 1.1 christos return -1; 1972 1.1 christos if (cpos == 0 && (boundaries & 2)) 1973 1.1 christos return 0; 1974 1.1 christos 1975 1.1 christos if (subexp_idx < BITSET_WORD_BITS) 1976 1.1 christos ent->eps_reachable_subexps_map &= 1977 1.1 christos ~ ((bitset_word) 1 << subexp_idx); 1978 1.1 christos } 1979 1.1 christos while (ent++->more); 1980 1.1 christos } 1981 1.1 christos break; 1982 1.1 christos 1983 1.1 christos case OP_OPEN_SUBEXP: 1984 1.1 christos if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) 1985 1.1 christos return -1; 1986 1.1 christos break; 1987 1.1 christos 1988 1.1 christos case OP_CLOSE_SUBEXP: 1989 1.1 christos if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) 1990 1.1 christos return 0; 1991 1.1 christos break; 1992 1.1 christos 1993 1.1 christos default: 1994 1.1 christos break; 1995 1.1 christos } 1996 1.1 christos } 1997 1.1 christos 1998 1.1 christos return (boundaries & 2) ? 1 : 0; 1999 1.1 christos } 2000 1.1 christos 2001 1.1 christos static int 2002 1.1 christos internal_function 2003 1.1 christos check_dst_limits_calc_pos (const re_match_context_t *mctx, 2004 1.1 christos Idx limit, Idx subexp_idx, 2005 1.1 christos Idx from_node, Idx str_idx, Idx bkref_idx) 2006 1.1 christos { 2007 1.1 christos struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; 2008 1.1 christos int boundaries; 2009 1.1 christos 2010 1.1 christos /* If we are outside the range of the subexpression, return -1 or 1. */ 2011 1.1 christos if (str_idx < lim->subexp_from) 2012 1.1 christos return -1; 2013 1.1 christos 2014 1.1 christos if (lim->subexp_to < str_idx) 2015 1.1 christos return 1; 2016 1.1 christos 2017 1.1 christos /* If we are within the subexpression, return 0. */ 2018 1.1 christos boundaries = (str_idx == lim->subexp_from); 2019 1.1 christos boundaries |= (str_idx == lim->subexp_to) << 1; 2020 1.1 christos if (boundaries == 0) 2021 1.1 christos return 0; 2022 1.1 christos 2023 1.1 christos /* Else, examine epsilon closure. */ 2024 1.1 christos return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, 2025 1.1 christos from_node, bkref_idx); 2026 1.1 christos } 2027 1.1 christos 2028 1.1 christos /* Check the limitations of sub expressions LIMITS, and remove the nodes 2029 1.1 christos which are against limitations from DEST_NODES. */ 2030 1.1 christos 2031 1.1 christos static reg_errcode_t 2032 1.1 christos internal_function 2033 1.1 christos check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes, 2034 1.1 christos const re_node_set *candidates, re_node_set *limits, 2035 1.1 christos struct re_backref_cache_entry *bkref_ents, Idx str_idx) 2036 1.1 christos { 2037 1.1 christos reg_errcode_t err; 2038 1.1 christos Idx node_idx, lim_idx; 2039 1.1 christos 2040 1.1 christos for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) 2041 1.1 christos { 2042 1.1 christos Idx subexp_idx; 2043 1.1 christos struct re_backref_cache_entry *ent; 2044 1.1 christos ent = bkref_ents + limits->elems[lim_idx]; 2045 1.1 christos 2046 1.1 christos if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) 2047 1.1 christos continue; /* This is unrelated limitation. */ 2048 1.1 christos 2049 1.1 christos subexp_idx = dfa->nodes[ent->node].opr.idx; 2050 1.1 christos if (ent->subexp_to == str_idx) 2051 1.1 christos { 2052 1.1 christos Idx ops_node = REG_MISSING; 2053 1.1 christos Idx cls_node = REG_MISSING; 2054 1.1 christos for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2055 1.1 christos { 2056 1.1 christos Idx node = dest_nodes->elems[node_idx]; 2057 1.1 christos re_token_type_t type = dfa->nodes[node].type; 2058 1.1 christos if (type == OP_OPEN_SUBEXP 2059 1.1 christos && subexp_idx == dfa->nodes[node].opr.idx) 2060 1.1 christos ops_node = node; 2061 1.1 christos else if (type == OP_CLOSE_SUBEXP 2062 1.1 christos && subexp_idx == dfa->nodes[node].opr.idx) 2063 1.1 christos cls_node = node; 2064 1.1 christos } 2065 1.1 christos 2066 1.1 christos /* Check the limitation of the open subexpression. */ 2067 1.1 christos /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ 2068 1.1 christos if (REG_VALID_INDEX (ops_node)) 2069 1.1 christos { 2070 1.1 christos err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, 2071 1.1 christos candidates); 2072 1.1 christos if (BE (err != REG_NOERROR, 0)) 2073 1.1 christos return err; 2074 1.1 christos } 2075 1.1 christos 2076 1.1 christos /* Check the limitation of the close subexpression. */ 2077 1.1 christos if (REG_VALID_INDEX (cls_node)) 2078 1.1 christos for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2079 1.1 christos { 2080 1.1 christos Idx node = dest_nodes->elems[node_idx]; 2081 1.1 christos if (!re_node_set_contains (dfa->inveclosures + node, 2082 1.1 christos cls_node) 2083 1.1 christos && !re_node_set_contains (dfa->eclosures + node, 2084 1.1 christos cls_node)) 2085 1.1 christos { 2086 1.1 christos /* It is against this limitation. 2087 1.1 christos Remove it form the current sifted state. */ 2088 1.1 christos err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2089 1.1 christos candidates); 2090 1.1 christos if (BE (err != REG_NOERROR, 0)) 2091 1.1 christos return err; 2092 1.1 christos --node_idx; 2093 1.1 christos } 2094 1.1 christos } 2095 1.1 christos } 2096 1.1 christos else /* (ent->subexp_to != str_idx) */ 2097 1.1 christos { 2098 1.1 christos for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) 2099 1.1 christos { 2100 1.1 christos Idx node = dest_nodes->elems[node_idx]; 2101 1.1 christos re_token_type_t type = dfa->nodes[node].type; 2102 1.1 christos if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) 2103 1.1 christos { 2104 1.1 christos if (subexp_idx != dfa->nodes[node].opr.idx) 2105 1.1 christos continue; 2106 1.1 christos /* It is against this limitation. 2107 1.1 christos Remove it form the current sifted state. */ 2108 1.1 christos err = sub_epsilon_src_nodes (dfa, node, dest_nodes, 2109 1.1 christos candidates); 2110 1.1 christos if (BE (err != REG_NOERROR, 0)) 2111 1.1 christos return err; 2112 1.1 christos } 2113 1.1 christos } 2114 1.1 christos } 2115 1.1 christos } 2116 1.1 christos return REG_NOERROR; 2117 1.1 christos } 2118 1.1 christos 2119 1.1 christos static reg_errcode_t 2120 1.1 christos internal_function 2121 1.1 christos sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx, 2122 1.1 christos Idx str_idx, const re_node_set *candidates) 2123 1.1 christos { 2124 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2125 1.1 christos reg_errcode_t err; 2126 1.1 christos Idx node_idx, node; 2127 1.1 christos re_sift_context_t local_sctx; 2128 1.1 christos Idx first_idx = search_cur_bkref_entry (mctx, str_idx); 2129 1.1 christos 2130 1.1 christos if (first_idx == REG_MISSING) 2131 1.1 christos return REG_NOERROR; 2132 1.1 christos 2133 1.1 christos local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ 2134 1.1 christos 2135 1.1 christos for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) 2136 1.1 christos { 2137 1.1 christos Idx enabled_idx; 2138 1.1 christos re_token_type_t type; 2139 1.1 christos struct re_backref_cache_entry *entry; 2140 1.1 christos node = candidates->elems[node_idx]; 2141 1.1 christos type = dfa->nodes[node].type; 2142 1.1 christos /* Avoid infinite loop for the REs like "()\1+". */ 2143 1.1 christos if (node == sctx->last_node && str_idx == sctx->last_str_idx) 2144 1.1 christos continue; 2145 1.1 christos if (type != OP_BACK_REF) 2146 1.1 christos continue; 2147 1.1 christos 2148 1.1 christos entry = mctx->bkref_ents + first_idx; 2149 1.1 christos enabled_idx = first_idx; 2150 1.1 christos do 2151 1.1 christos { 2152 1.1 christos bool ok; 2153 1.1 christos Idx subexp_len, to_idx, dst_node; 2154 1.1 christos re_dfastate_t *cur_state; 2155 1.1 christos 2156 1.1 christos if (entry->node != node) 2157 1.1 christos continue; 2158 1.1 christos subexp_len = entry->subexp_to - entry->subexp_from; 2159 1.1 christos to_idx = str_idx + subexp_len; 2160 1.1 christos dst_node = (subexp_len ? dfa->nexts[node] 2161 1.1 christos : dfa->edests[node].elems[0]); 2162 1.1 christos 2163 1.1 christos if (to_idx > sctx->last_str_idx 2164 1.1 christos || sctx->sifted_states[to_idx] == NULL 2165 1.1 christos || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) 2166 1.1 christos || check_dst_limits (mctx, &sctx->limits, node, 2167 1.1 christos str_idx, dst_node, to_idx)) 2168 1.1 christos continue; 2169 1.1 christos 2170 1.1 christos if (local_sctx.sifted_states == NULL) 2171 1.1 christos { 2172 1.1 christos local_sctx = *sctx; 2173 1.1 christos err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); 2174 1.1 christos if (BE (err != REG_NOERROR, 0)) 2175 1.1 christos goto free_return; 2176 1.1 christos } 2177 1.1 christos local_sctx.last_node = node; 2178 1.1 christos local_sctx.last_str_idx = str_idx; 2179 1.1 christos ok = re_node_set_insert (&local_sctx.limits, enabled_idx); 2180 1.1 christos if (BE (! ok, 0)) 2181 1.1 christos { 2182 1.1 christos err = REG_ESPACE; 2183 1.1 christos goto free_return; 2184 1.1 christos } 2185 1.1 christos cur_state = local_sctx.sifted_states[str_idx]; 2186 1.1 christos err = sift_states_backward (mctx, &local_sctx); 2187 1.1 christos if (BE (err != REG_NOERROR, 0)) 2188 1.1 christos goto free_return; 2189 1.1 christos if (sctx->limited_states != NULL) 2190 1.1 christos { 2191 1.1 christos err = merge_state_array (dfa, sctx->limited_states, 2192 1.1 christos local_sctx.sifted_states, 2193 1.1 christos str_idx + 1); 2194 1.1 christos if (BE (err != REG_NOERROR, 0)) 2195 1.1 christos goto free_return; 2196 1.1 christos } 2197 1.1 christos local_sctx.sifted_states[str_idx] = cur_state; 2198 1.1 christos re_node_set_remove (&local_sctx.limits, enabled_idx); 2199 1.1 christos 2200 1.1 christos /* mctx->bkref_ents may have changed, reload the pointer. */ 2201 1.1 christos entry = mctx->bkref_ents + enabled_idx; 2202 1.1 christos } 2203 1.1 christos while (enabled_idx++, entry++->more); 2204 1.1 christos } 2205 1.1 christos err = REG_NOERROR; 2206 1.1 christos free_return: 2207 1.1 christos if (local_sctx.sifted_states != NULL) 2208 1.1 christos { 2209 1.1 christos re_node_set_free (&local_sctx.limits); 2210 1.1 christos } 2211 1.1 christos 2212 1.1 christos return err; 2213 1.1 christos } 2214 1.1 christos 2215 1.1 christos 2216 1.1 christos #ifdef RE_ENABLE_I18N 2217 1.1 christos static int 2218 1.1 christos internal_function 2219 1.1 christos sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, 2220 1.1 christos Idx node_idx, Idx str_idx, Idx max_str_idx) 2221 1.1 christos { 2222 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2223 1.1 christos int naccepted; 2224 1.1 christos /* Check the node can accept `multi byte'. */ 2225 1.1 christos naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); 2226 1.1 christos if (naccepted > 0 && str_idx + naccepted <= max_str_idx && 2227 1.1 christos !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], 2228 1.1 christos dfa->nexts[node_idx])) 2229 1.1 christos /* The node can't accept the `multi byte', or the 2230 1.1 christos destination was already thrown away, then the node 2231 1.1 christos could't accept the current input `multi byte'. */ 2232 1.1 christos naccepted = 0; 2233 1.1 christos /* Otherwise, it is sure that the node could accept 2234 1.1 christos `naccepted' bytes input. */ 2235 1.1 christos return naccepted; 2236 1.1 christos } 2237 1.1 christos #endif /* RE_ENABLE_I18N */ 2238 1.1 christos 2239 1.1 christos 2240 1.1 christos /* Functions for state transition. */ 2242 1.1 christos 2243 1.1 christos /* Return the next state to which the current state STATE will transit by 2244 1.1 christos accepting the current input byte, and update STATE_LOG if necessary. 2245 1.1 christos If STATE can accept a multibyte char/collating element/back reference 2246 1.1 christos update the destination of STATE_LOG. */ 2247 1.1 christos 2248 1.1 christos static re_dfastate_t * 2249 1.1 christos internal_function 2250 1.1 christos transit_state (reg_errcode_t *err, re_match_context_t *mctx, 2251 1.1 christos re_dfastate_t *state) 2252 1.1 christos { 2253 1.1 christos re_dfastate_t **trtable; 2254 1.1 christos unsigned char ch; 2255 1.1 christos 2256 1.1 christos #ifdef RE_ENABLE_I18N 2257 1.1 christos /* If the current state can accept multibyte. */ 2258 1.1 christos if (BE (state->accept_mb, 0)) 2259 1.1 christos { 2260 1.1 christos *err = transit_state_mb (mctx, state); 2261 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2262 1.1 christos return NULL; 2263 1.1 christos } 2264 1.1 christos #endif /* RE_ENABLE_I18N */ 2265 1.1 christos 2266 1.1 christos /* Then decide the next state with the single byte. */ 2267 1.1 christos #if 0 2268 1.1 christos if (0) 2269 1.1 christos /* don't use transition table */ 2270 1.1 christos return transit_state_sb (err, mctx, state); 2271 1.1 christos #endif 2272 1.1 christos 2273 1.1 christos /* Use transition table */ 2274 1.1 christos ch = re_string_fetch_byte (&mctx->input); 2275 1.1 christos for (;;) 2276 1.1 christos { 2277 1.1 christos trtable = state->trtable; 2278 1.1 christos if (BE (trtable != NULL, 1)) 2279 1.1 christos return trtable[ch]; 2280 1.1 christos 2281 1.1 christos trtable = state->word_trtable; 2282 1.1 christos if (BE (trtable != NULL, 1)) 2283 1.1 christos { 2284 1.1 christos unsigned int context; 2285 1.1 christos context 2286 1.1 christos = re_string_context_at (&mctx->input, 2287 1.1 christos re_string_cur_idx (&mctx->input) - 1, 2288 1.1 christos mctx->eflags); 2289 1.1 christos if (IS_WORD_CONTEXT (context)) 2290 1.1 christos return trtable[ch + SBC_MAX]; 2291 1.1 christos else 2292 1.1 christos return trtable[ch]; 2293 1.1 christos } 2294 1.1 christos 2295 1.1 christos if (!build_trtable (mctx->dfa, state)) 2296 1.1 christos { 2297 1.1 christos *err = REG_ESPACE; 2298 1.1 christos return NULL; 2299 1.1 christos } 2300 1.1 christos 2301 1.1 christos /* Retry, we now have a transition table. */ 2302 1.1 christos } 2303 1.1 christos } 2304 1.1 christos 2305 1.1 christos /* Update the state_log if we need */ 2306 1.1 christos re_dfastate_t * 2307 1.1 christos internal_function 2308 1.1 christos merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, 2309 1.1 christos re_dfastate_t *next_state) 2310 1.1 christos { 2311 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2312 1.1 christos Idx cur_idx = re_string_cur_idx (&mctx->input); 2313 1.1 christos 2314 1.1 christos if (cur_idx > mctx->state_log_top) 2315 1.1 christos { 2316 1.1 christos mctx->state_log[cur_idx] = next_state; 2317 1.1 christos mctx->state_log_top = cur_idx; 2318 1.1 christos } 2319 1.1 christos else if (mctx->state_log[cur_idx] == 0) 2320 1.1 christos { 2321 1.1 christos mctx->state_log[cur_idx] = next_state; 2322 1.1 christos } 2323 1.1 christos else 2324 1.1 christos { 2325 1.1 christos re_dfastate_t *pstate; 2326 1.1 christos unsigned int context; 2327 1.1 christos re_node_set next_nodes, *log_nodes, *table_nodes = NULL; 2328 1.1 christos /* If (state_log[cur_idx] != 0), it implies that cur_idx is 2329 1.1 christos the destination of a multibyte char/collating element/ 2330 1.1 christos back reference. Then the next state is the union set of 2331 1.1 christos these destinations and the results of the transition table. */ 2332 1.1 christos pstate = mctx->state_log[cur_idx]; 2333 1.1 christos log_nodes = pstate->entrance_nodes; 2334 1.1 christos if (next_state != NULL) 2335 1.1 christos { 2336 1.1 christos table_nodes = next_state->entrance_nodes; 2337 1.1 christos *err = re_node_set_init_union (&next_nodes, table_nodes, 2338 1.1 christos log_nodes); 2339 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2340 1.1 christos return NULL; 2341 1.1 christos } 2342 1.1 christos else 2343 1.1 christos next_nodes = *log_nodes; 2344 1.1 christos /* Note: We already add the nodes of the initial state, 2345 1.1 christos then we don't need to add them here. */ 2346 1.1 christos 2347 1.1 christos context = re_string_context_at (&mctx->input, 2348 1.1 christos re_string_cur_idx (&mctx->input) - 1, 2349 1.1 christos mctx->eflags); 2350 1.1 christos next_state = mctx->state_log[cur_idx] 2351 1.1 christos = re_acquire_state_context (err, dfa, &next_nodes, context); 2352 1.1 christos /* We don't need to check errors here, since the return value of 2353 1.1 christos this function is next_state and ERR is already set. */ 2354 1.1 christos 2355 1.1 christos if (table_nodes != NULL) 2356 1.1 christos re_node_set_free (&next_nodes); 2357 1.1 christos } 2358 1.1 christos 2359 1.1 christos if (BE (dfa->nbackref, 0) && next_state != NULL) 2360 1.1 christos { 2361 1.1 christos /* Check OP_OPEN_SUBEXP in the current state in case that we use them 2362 1.1 christos later. We must check them here, since the back references in the 2363 1.1 christos next state might use them. */ 2364 1.1 christos *err = check_subexp_matching_top (mctx, &next_state->nodes, 2365 1.1 christos cur_idx); 2366 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2367 1.1 christos return NULL; 2368 1.1 christos 2369 1.1 christos /* If the next state has back references. */ 2370 1.1 christos if (next_state->has_backref) 2371 1.1 christos { 2372 1.1 christos *err = transit_state_bkref (mctx, &next_state->nodes); 2373 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2374 1.1 christos return NULL; 2375 1.1 christos next_state = mctx->state_log[cur_idx]; 2376 1.1 christos } 2377 1.1 christos } 2378 1.1 christos 2379 1.1 christos return next_state; 2380 1.1 christos } 2381 1.1 christos 2382 1.1 christos /* Skip bytes in the input that correspond to part of a 2383 1.1 christos multi-byte match, then look in the log for a state 2384 1.1 christos from which to restart matching. */ 2385 1.1 christos static re_dfastate_t * 2386 1.1 christos internal_function 2387 1.1 christos find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) 2388 1.1 christos { 2389 1.1 christos re_dfastate_t *cur_state = NULL; 2390 1.1 christos do 2391 1.1 christos { 2392 1.1 christos Idx max = mctx->state_log_top; 2393 1.1 christos Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2394 1.1 christos 2395 1.1 christos do 2396 1.1 christos { 2397 1.1 christos if (++cur_str_idx > max) 2398 1.1 christos return NULL; 2399 1.1 christos re_string_skip_bytes (&mctx->input, 1); 2400 1.1 christos } 2401 1.1 christos while (mctx->state_log[cur_str_idx] == NULL); 2402 1.1 christos 2403 1.1 christos cur_state = merge_state_with_log (err, mctx, NULL); 2404 1.1 christos } 2405 1.1 christos while (*err == REG_NOERROR && cur_state == NULL); 2406 1.1 christos return cur_state; 2407 1.1 christos } 2408 1.1 christos 2409 1.1 christos /* Helper functions for transit_state. */ 2410 1.1 christos 2411 1.1 christos /* From the node set CUR_NODES, pick up the nodes whose types are 2412 1.1 christos OP_OPEN_SUBEXP and which have corresponding back references in the regular 2413 1.1 christos expression. And register them to use them later for evaluating the 2414 1.1 christos correspoding back references. */ 2415 1.1 christos 2416 1.1 christos static reg_errcode_t 2417 1.1 christos internal_function 2418 1.1 christos check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, 2419 1.1 christos Idx str_idx) 2420 1.1 christos { 2421 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2422 1.1 christos Idx node_idx; 2423 1.1 christos reg_errcode_t err; 2424 1.1 christos 2425 1.1 christos /* TODO: This isn't efficient. 2426 1.1 christos Because there might be more than one nodes whose types are 2427 1.1 christos OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all 2428 1.1 christos nodes. 2429 1.1 christos E.g. RE: (a){2} */ 2430 1.1 christos for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) 2431 1.1 christos { 2432 1.1 christos Idx node = cur_nodes->elems[node_idx]; 2433 1.1 christos if (dfa->nodes[node].type == OP_OPEN_SUBEXP 2434 1.1 christos && dfa->nodes[node].opr.idx < BITSET_WORD_BITS 2435 1.1 christos && (dfa->used_bkref_map 2436 1.1 christos & ((bitset_word) 1 << dfa->nodes[node].opr.idx))) 2437 1.1 christos { 2438 1.1 christos err = match_ctx_add_subtop (mctx, node, str_idx); 2439 1.1 christos if (BE (err != REG_NOERROR, 0)) 2440 1.1 christos return err; 2441 1.1 christos } 2442 1.1 christos } 2443 1.1 christos return REG_NOERROR; 2444 1.1 christos } 2445 1.1 christos 2446 1.1 christos #if 0 2447 1.1 christos /* Return the next state to which the current state STATE will transit by 2448 1.1 christos accepting the current input byte. */ 2449 1.1 christos 2450 1.1 christos static re_dfastate_t * 2451 1.1 christos transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, 2452 1.1 christos re_dfastate_t *state) 2453 1.1 christos { 2454 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2455 1.1 christos re_node_set next_nodes; 2456 1.1 christos re_dfastate_t *next_state; 2457 1.1 christos Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); 2458 1.1 christos unsigned int context; 2459 1.1 christos 2460 1.1 christos *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); 2461 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2462 1.1 christos return NULL; 2463 1.1 christos for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) 2464 1.1 christos { 2465 1.1 christos Idx cur_node = state->nodes.elems[node_cnt]; 2466 1.1 christos if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) 2467 1.1 christos { 2468 1.1 christos *err = re_node_set_merge (&next_nodes, 2469 1.1 christos dfa->eclosures + dfa->nexts[cur_node]); 2470 1.1 christos if (BE (*err != REG_NOERROR, 0)) 2471 1.1 christos { 2472 1.1 christos re_node_set_free (&next_nodes); 2473 1.1 christos return NULL; 2474 1.1 christos } 2475 1.1 christos } 2476 1.1 christos } 2477 1.1 christos context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); 2478 1.1 christos next_state = re_acquire_state_context (err, dfa, &next_nodes, context); 2479 1.1 christos /* We don't need to check errors here, since the return value of 2480 1.1 christos this function is next_state and ERR is already set. */ 2481 1.1 christos 2482 1.1 christos re_node_set_free (&next_nodes); 2483 1.1 christos re_string_skip_bytes (&mctx->input, 1); 2484 1.1 christos return next_state; 2485 1.1 christos } 2486 1.1 christos #endif 2487 1.1 christos 2488 1.1 christos #ifdef RE_ENABLE_I18N 2489 1.1 christos static reg_errcode_t 2490 1.1 christos internal_function 2491 1.1 christos transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) 2492 1.1 christos { 2493 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2494 1.1 christos reg_errcode_t err; 2495 1.1 christos Idx i; 2496 1.1 christos 2497 1.1 christos for (i = 0; i < pstate->nodes.nelem; ++i) 2498 1.1 christos { 2499 1.1 christos re_node_set dest_nodes, *new_nodes; 2500 1.1 christos Idx cur_node_idx = pstate->nodes.elems[i]; 2501 1.1 christos int naccepted; 2502 1.1 christos Idx dest_idx; 2503 1.1 christos unsigned int context; 2504 1.1 christos re_dfastate_t *dest_state; 2505 1.1 christos 2506 1.1 christos if (!dfa->nodes[cur_node_idx].accept_mb) 2507 1.1 christos continue; 2508 1.1 christos 2509 1.1 christos if (dfa->nodes[cur_node_idx].constraint) 2510 1.1 christos { 2511 1.1 christos context = re_string_context_at (&mctx->input, 2512 1.1 christos re_string_cur_idx (&mctx->input), 2513 1.1 christos mctx->eflags); 2514 1.1 christos if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, 2515 1.1 christos context)) 2516 1.1 christos continue; 2517 1.1 christos } 2518 1.1 christos 2519 1.1 christos /* How many bytes the node can accept? */ 2520 1.1 christos naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, 2521 1.1 christos re_string_cur_idx (&mctx->input)); 2522 1.1 christos if (naccepted == 0) 2523 1.1 christos continue; 2524 1.1 christos 2525 1.1 christos /* The node can accepts `naccepted' bytes. */ 2526 1.1 christos dest_idx = re_string_cur_idx (&mctx->input) + naccepted; 2527 1.1 christos mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted 2528 1.1 christos : mctx->max_mb_elem_len); 2529 1.1 christos err = clean_state_log_if_needed (mctx, dest_idx); 2530 1.1 christos if (BE (err != REG_NOERROR, 0)) 2531 1.1 christos return err; 2532 1.1 christos #ifdef DEBUG 2533 1.1 christos assert (dfa->nexts[cur_node_idx] != REG_MISSING); 2534 1.1 christos #endif 2535 1.1 christos new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; 2536 1.1 christos 2537 1.1 christos dest_state = mctx->state_log[dest_idx]; 2538 1.1 christos if (dest_state == NULL) 2539 1.1 christos dest_nodes = *new_nodes; 2540 1.1 christos else 2541 1.1 christos { 2542 1.1 christos err = re_node_set_init_union (&dest_nodes, 2543 1.1 christos dest_state->entrance_nodes, new_nodes); 2544 1.1 christos if (BE (err != REG_NOERROR, 0)) 2545 1.1 christos return err; 2546 1.1 christos } 2547 1.1 christos context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags); 2548 1.1 christos mctx->state_log[dest_idx] 2549 1.1 christos = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2550 1.1 christos if (dest_state != NULL) 2551 1.1 christos re_node_set_free (&dest_nodes); 2552 1.1 christos if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) 2553 1.1 christos return err; 2554 1.1 christos } 2555 1.1 christos return REG_NOERROR; 2556 1.1 christos } 2557 1.1 christos #endif /* RE_ENABLE_I18N */ 2558 1.1 christos 2559 1.1 christos static reg_errcode_t 2560 1.1 christos internal_function 2561 1.1 christos transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) 2562 1.1 christos { 2563 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2564 1.1 christos reg_errcode_t err; 2565 1.1 christos Idx i; 2566 1.1 christos Idx cur_str_idx = re_string_cur_idx (&mctx->input); 2567 1.1 christos 2568 1.1 christos for (i = 0; i < nodes->nelem; ++i) 2569 1.1 christos { 2570 1.1 christos Idx dest_str_idx, prev_nelem, bkc_idx; 2571 1.1 christos Idx node_idx = nodes->elems[i]; 2572 1.1 christos unsigned int context; 2573 1.1 christos const re_token_t *node = dfa->nodes + node_idx; 2574 1.1 christos re_node_set *new_dest_nodes; 2575 1.1 christos 2576 1.1 christos /* Check whether `node' is a backreference or not. */ 2577 1.1 christos if (node->type != OP_BACK_REF) 2578 1.1 christos continue; 2579 1.1 christos 2580 1.1 christos if (node->constraint) 2581 1.1 christos { 2582 1.1 christos context = re_string_context_at (&mctx->input, cur_str_idx, 2583 1.1 christos mctx->eflags); 2584 1.1 christos if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 2585 1.1 christos continue; 2586 1.1 christos } 2587 1.1 christos 2588 1.1 christos /* `node' is a backreference. 2589 1.1 christos Check the substring which the substring matched. */ 2590 1.1 christos bkc_idx = mctx->nbkref_ents; 2591 1.1 christos err = get_subexp (mctx, node_idx, cur_str_idx); 2592 1.1 christos if (BE (err != REG_NOERROR, 0)) 2593 1.1 christos goto free_return; 2594 1.1 christos 2595 1.1 christos /* And add the epsilon closures (which is `new_dest_nodes') of 2596 1.1 christos the backreference to appropriate state_log. */ 2597 1.1 christos #ifdef DEBUG 2598 1.1 christos assert (dfa->nexts[node_idx] != REG_MISSING); 2599 1.1 christos #endif 2600 1.1 christos for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) 2601 1.1 christos { 2602 1.1 christos Idx subexp_len; 2603 1.1 christos re_dfastate_t *dest_state; 2604 1.1 christos struct re_backref_cache_entry *bkref_ent; 2605 1.1 christos bkref_ent = mctx->bkref_ents + bkc_idx; 2606 1.1 christos if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) 2607 1.1 christos continue; 2608 1.1 christos subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; 2609 1.1 christos new_dest_nodes = (subexp_len == 0 2610 1.1 christos ? dfa->eclosures + dfa->edests[node_idx].elems[0] 2611 1.1 christos : dfa->eclosures + dfa->nexts[node_idx]); 2612 1.1 christos dest_str_idx = (cur_str_idx + bkref_ent->subexp_to 2613 1.1 christos - bkref_ent->subexp_from); 2614 1.1 christos context = re_string_context_at (&mctx->input, dest_str_idx - 1, 2615 1.1 christos mctx->eflags); 2616 1.1 christos dest_state = mctx->state_log[dest_str_idx]; 2617 1.1 christos prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 2618 1.1 christos : mctx->state_log[cur_str_idx]->nodes.nelem); 2619 1.1 christos /* Add `new_dest_node' to state_log. */ 2620 1.1 christos if (dest_state == NULL) 2621 1.1 christos { 2622 1.1 christos mctx->state_log[dest_str_idx] 2623 1.1 christos = re_acquire_state_context (&err, dfa, new_dest_nodes, 2624 1.1 christos context); 2625 1.1 christos if (BE (mctx->state_log[dest_str_idx] == NULL 2626 1.1 christos && err != REG_NOERROR, 0)) 2627 1.1 christos goto free_return; 2628 1.1 christos } 2629 1.1 christos else 2630 1.1 christos { 2631 1.1 christos re_node_set dest_nodes; 2632 1.1 christos err = re_node_set_init_union (&dest_nodes, 2633 1.1 christos dest_state->entrance_nodes, 2634 1.1 christos new_dest_nodes); 2635 1.1 christos if (BE (err != REG_NOERROR, 0)) 2636 1.1 christos { 2637 1.1 christos re_node_set_free (&dest_nodes); 2638 1.1 christos goto free_return; 2639 1.1 christos } 2640 1.1 christos mctx->state_log[dest_str_idx] 2641 1.1 christos = re_acquire_state_context (&err, dfa, &dest_nodes, context); 2642 1.1 christos re_node_set_free (&dest_nodes); 2643 1.1 christos if (BE (mctx->state_log[dest_str_idx] == NULL 2644 1.1 christos && err != REG_NOERROR, 0)) 2645 1.1 christos goto free_return; 2646 1.1 christos } 2647 1.1 christos /* We need to check recursively if the backreference can epsilon 2648 1.1 christos transit. */ 2649 1.1 christos if (subexp_len == 0 2650 1.1 christos && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) 2651 1.1 christos { 2652 1.1 christos err = check_subexp_matching_top (mctx, new_dest_nodes, 2653 1.1 christos cur_str_idx); 2654 1.1 christos if (BE (err != REG_NOERROR, 0)) 2655 1.1 christos goto free_return; 2656 1.1 christos err = transit_state_bkref (mctx, new_dest_nodes); 2657 1.1 christos if (BE (err != REG_NOERROR, 0)) 2658 1.1 christos goto free_return; 2659 1.1 christos } 2660 1.1 christos } 2661 1.1 christos } 2662 1.1 christos err = REG_NOERROR; 2663 1.1 christos free_return: 2664 1.1 christos return err; 2665 1.1 christos } 2666 1.1 christos 2667 1.1 christos /* Enumerate all the candidates which the backreference BKREF_NODE can match 2668 1.1 christos at BKREF_STR_IDX, and register them by match_ctx_add_entry(). 2669 1.1 christos Note that we might collect inappropriate candidates here. 2670 1.1 christos However, the cost of checking them strictly here is too high, then we 2671 1.1 christos delay these checking for prune_impossible_nodes(). */ 2672 1.1 christos 2673 1.1 christos static reg_errcode_t 2674 1.1 christos internal_function 2675 1.1 christos get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) 2676 1.1 christos { 2677 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2678 1.1 christos Idx subexp_num, sub_top_idx; 2679 1.1 christos const char *buf = (const char *) re_string_get_buffer (&mctx->input); 2680 1.1 christos /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ 2681 1.1 christos Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); 2682 1.1 christos if (cache_idx != REG_MISSING) 2683 1.1 christos { 2684 1.1 christos const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; 2685 1.1 christos do 2686 1.1 christos if (entry->node == bkref_node) 2687 1.1 christos return REG_NOERROR; /* We already checked it. */ 2688 1.1 christos while (entry++->more); 2689 1.1 christos } 2690 1.1 christos 2691 1.1 christos subexp_num = dfa->nodes[bkref_node].opr.idx; 2692 1.1 christos 2693 1.1 christos /* For each sub expression */ 2694 1.1 christos for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) 2695 1.1 christos { 2696 1.1 christos reg_errcode_t err; 2697 1.1 christos re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; 2698 1.1 christos re_sub_match_last_t *sub_last; 2699 1.1 christos Idx sub_last_idx, sl_str, bkref_str_off; 2700 1.1 christos 2701 1.1 christos if (dfa->nodes[sub_top->node].opr.idx != subexp_num) 2702 1.1 christos continue; /* It isn't related. */ 2703 1.1 christos 2704 1.1 christos sl_str = sub_top->str_idx; 2705 1.1 christos bkref_str_off = bkref_str_idx; 2706 1.1 christos /* At first, check the last node of sub expressions we already 2707 1.1 christos evaluated. */ 2708 1.1 christos for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) 2709 1.1 christos { 2710 1.1 christos regoff_t sl_str_diff; 2711 1.1 christos sub_last = sub_top->lasts[sub_last_idx]; 2712 1.1 christos sl_str_diff = sub_last->str_idx - sl_str; 2713 1.1 christos /* The matched string by the sub expression match with the substring 2714 1.1 christos at the back reference? */ 2715 1.1 christos if (sl_str_diff > 0) 2716 1.1 christos { 2717 1.1 christos if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) 2718 1.1 christos { 2719 1.1 christos /* Not enough chars for a successful match. */ 2720 1.1 christos if (bkref_str_off + sl_str_diff > mctx->input.len) 2721 1.1 christos break; 2722 1.1 christos 2723 1.1 christos err = clean_state_log_if_needed (mctx, 2724 1.1 christos bkref_str_off 2725 1.1 christos + sl_str_diff); 2726 1.1 christos if (BE (err != REG_NOERROR, 0)) 2727 1.1 christos return err; 2728 1.1 christos buf = (const char *) re_string_get_buffer (&mctx->input); 2729 1.1 christos } 2730 1.1 christos if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) 2731 1.1 christos break; /* We don't need to search this sub expression any more. */ 2732 1.1 christos } 2733 1.1 christos bkref_str_off += sl_str_diff; 2734 1.1 christos sl_str += sl_str_diff; 2735 1.1 christos err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2736 1.1 christos bkref_str_idx); 2737 1.1 christos 2738 1.1 christos /* Reload buf, since the preceding call might have reallocated 2739 1.1 christos the buffer. */ 2740 1.1 christos buf = (const char *) re_string_get_buffer (&mctx->input); 2741 1.1 christos 2742 1.1 christos if (err == REG_NOMATCH) 2743 1.1 christos continue; 2744 1.1 christos if (BE (err != REG_NOERROR, 0)) 2745 1.1 christos return err; 2746 1.1 christos } 2747 1.1 christos 2748 1.1 christos if (sub_last_idx < sub_top->nlasts) 2749 1.1 christos continue; 2750 1.1 christos if (sub_last_idx > 0) 2751 1.1 christos ++sl_str; 2752 1.1 christos /* Then, search for the other last nodes of the sub expression. */ 2753 1.1 christos for (; sl_str <= bkref_str_idx; ++sl_str) 2754 1.1 christos { 2755 1.1 christos Idx cls_node; 2756 1.1 christos regoff_t sl_str_off; 2757 1.1 christos const re_node_set *nodes; 2758 1.1 christos sl_str_off = sl_str - sub_top->str_idx; 2759 1.1 christos /* The matched string by the sub expression match with the substring 2760 1.1 christos at the back reference? */ 2761 1.1 christos if (sl_str_off > 0) 2762 1.1 christos { 2763 1.1 christos if (BE (bkref_str_off >= mctx->input.valid_len, 0)) 2764 1.1 christos { 2765 1.1 christos /* If we are at the end of the input, we cannot match. */ 2766 1.1 christos if (bkref_str_off >= mctx->input.len) 2767 1.1 christos break; 2768 1.1 christos 2769 1.1 christos err = extend_buffers (mctx); 2770 1.1 christos if (BE (err != REG_NOERROR, 0)) 2771 1.1 christos return err; 2772 1.1 christos 2773 1.1 christos buf = (const char *) re_string_get_buffer (&mctx->input); 2774 1.1 christos } 2775 1.1 christos if (buf [bkref_str_off++] != buf[sl_str - 1]) 2776 1.1 christos break; /* We don't need to search this sub expression 2777 1.1 christos any more. */ 2778 1.1 christos } 2779 1.1 christos if (mctx->state_log[sl_str] == NULL) 2780 1.1 christos continue; 2781 1.1 christos /* Does this state have a ')' of the sub expression? */ 2782 1.1 christos nodes = &mctx->state_log[sl_str]->nodes; 2783 1.1 christos cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP); 2784 1.1 christos if (cls_node == REG_MISSING) 2785 1.1 christos continue; /* No. */ 2786 1.1 christos if (sub_top->path == NULL) 2787 1.1 christos { 2788 1.1 christos sub_top->path = re_calloc (state_array_t, 2789 1.1 christos sl_str - sub_top->str_idx + 1); 2790 1.1 christos if (sub_top->path == NULL) 2791 1.1 christos return REG_ESPACE; 2792 1.1 christos } 2793 1.1 christos /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node 2794 1.1 christos in the current context? */ 2795 1.1 christos err = check_arrival (mctx, sub_top->path, sub_top->node, 2796 1.1 christos sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP); 2797 1.1 christos if (err == REG_NOMATCH) 2798 1.1 christos continue; 2799 1.1 christos if (BE (err != REG_NOERROR, 0)) 2800 1.1 christos return err; 2801 1.1 christos sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); 2802 1.1 christos if (BE (sub_last == NULL, 0)) 2803 1.1 christos return REG_ESPACE; 2804 1.1 christos err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, 2805 1.1 christos bkref_str_idx); 2806 1.1 christos if (err == REG_NOMATCH) 2807 1.1 christos continue; 2808 1.1 christos } 2809 1.1 christos } 2810 1.1 christos return REG_NOERROR; 2811 1.1 christos } 2812 1.1 christos 2813 1.1 christos /* Helper functions for get_subexp(). */ 2814 1.1 christos 2815 1.1 christos /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. 2816 1.1 christos If it can arrive, register the sub expression expressed with SUB_TOP 2817 1.1 christos and SUB_LAST. */ 2818 1.1 christos 2819 1.1 christos static reg_errcode_t 2820 1.1 christos internal_function 2821 1.1 christos get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, 2822 1.1 christos re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) 2823 1.1 christos { 2824 1.1 christos reg_errcode_t err; 2825 1.1 christos Idx to_idx; 2826 1.1 christos /* Can the subexpression arrive the back reference? */ 2827 1.1 christos err = check_arrival (mctx, &sub_last->path, sub_last->node, 2828 1.1 christos sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP); 2829 1.1 christos if (err != REG_NOERROR) 2830 1.1 christos return err; 2831 1.1 christos err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, 2832 1.1 christos sub_last->str_idx); 2833 1.1 christos if (BE (err != REG_NOERROR, 0)) 2834 1.1 christos return err; 2835 1.1 christos to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; 2836 1.1 christos return clean_state_log_if_needed (mctx, to_idx); 2837 1.1 christos } 2838 1.1 christos 2839 1.1 christos /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. 2840 1.1 christos Search '(' if FL_OPEN, or search ')' otherwise. 2841 1.1 christos TODO: This function isn't efficient... 2842 1.1 christos Because there might be more than one nodes whose types are 2843 1.1 christos OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all 2844 1.1 christos nodes. 2845 1.1 christos E.g. RE: (a){2} */ 2846 1.1 christos 2847 1.1 christos static Idx 2848 1.1 christos internal_function 2849 1.1 christos find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, 2850 1.1 christos Idx subexp_idx, int type) 2851 1.1 christos { 2852 1.1 christos Idx cls_idx; 2853 1.1 christos for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) 2854 1.1 christos { 2855 1.1 christos Idx cls_node = nodes->elems[cls_idx]; 2856 1.1 christos const re_token_t *node = dfa->nodes + cls_node; 2857 1.1 christos if (node->type == type 2858 1.1 christos && node->opr.idx == subexp_idx) 2859 1.1 christos return cls_node; 2860 1.1 christos } 2861 1.1 christos return REG_MISSING; 2862 1.1 christos } 2863 1.1 christos 2864 1.1 christos /* Check whether the node TOP_NODE at TOP_STR can arrive to the node 2865 1.1 christos LAST_NODE at LAST_STR. We record the path onto PATH since it will be 2866 1.1 christos heavily reused. 2867 1.1 christos Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ 2868 1.1 christos 2869 1.1 christos static reg_errcode_t 2870 1.1 christos internal_function 2871 1.1 christos check_arrival (re_match_context_t *mctx, state_array_t *path, 2872 1.1 christos Idx top_node, Idx top_str, Idx last_node, Idx last_str, 2873 1.1 christos int type) 2874 1.1 christos { 2875 1.1 christos re_dfa_t *const dfa = mctx->dfa; 2876 1.1 christos reg_errcode_t err; 2877 1.1 christos Idx subexp_num, backup_cur_idx, str_idx, null_cnt; 2878 1.1 christos re_dfastate_t *cur_state = NULL; 2879 1.1 christos re_node_set *cur_nodes, next_nodes; 2880 1.1 christos re_dfastate_t **backup_state_log; 2881 1.1 christos unsigned int context; 2882 1.1 christos 2883 1.1 christos subexp_num = dfa->nodes[top_node].opr.idx; 2884 1.1 christos /* Extend the buffer if we need. */ 2885 1.1 christos if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) 2886 1.1 christos { 2887 1.1 christos re_dfastate_t **new_array; 2888 1.1 christos Idx old_alloc = path->alloc; 2889 1.1 christos Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; 2890 1.1 christos if (BE (new_alloc < old_alloc, 0)) 2891 1.1 christos return REG_ESPACE; 2892 1.1 christos new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc); 2893 1.1 christos if (BE (new_array == NULL, 0)) 2894 1.1 christos return REG_ESPACE; 2895 1.1 christos path->array = new_array; 2896 1.1 christos path->alloc = new_alloc; 2897 1.1 christos memset (new_array + old_alloc, '\0', 2898 1.1 christos sizeof (re_dfastate_t *) * (new_alloc - old_alloc)); 2899 1.1 christos } 2900 1.1 christos 2901 1.1 christos str_idx = path->next_idx == 0 ? top_str : path->next_idx; 2902 1.1 christos 2903 1.1 christos /* Temporary modify MCTX. */ 2904 1.1 christos backup_state_log = mctx->state_log; 2905 1.1 christos backup_cur_idx = mctx->input.cur_idx; 2906 1.1 christos mctx->state_log = path->array; 2907 1.1 christos mctx->input.cur_idx = str_idx; 2908 1.1 christos 2909 1.1 christos /* Setup initial node set. */ 2910 1.1 christos context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 2911 1.1 christos if (str_idx == top_str) 2912 1.1 christos { 2913 1.1 christos err = re_node_set_init_1 (&next_nodes, top_node); 2914 1.1 christos if (BE (err != REG_NOERROR, 0)) 2915 1.1 christos return err; 2916 1.1 christos err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2917 1.1 christos if (BE (err != REG_NOERROR, 0)) 2918 1.1 christos { 2919 1.1 christos re_node_set_free (&next_nodes); 2920 1.1 christos return err; 2921 1.1 christos } 2922 1.1 christos } 2923 1.1 christos else 2924 1.1 christos { 2925 1.1 christos cur_state = mctx->state_log[str_idx]; 2926 1.1 christos if (cur_state && cur_state->has_backref) 2927 1.1 christos { 2928 1.1 christos err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); 2929 1.1 christos if (BE ( err != REG_NOERROR, 0)) 2930 1.1 christos return err; 2931 1.1 christos } 2932 1.1 christos else 2933 1.1 christos re_node_set_init_empty (&next_nodes); 2934 1.1 christos } 2935 1.1 christos if (str_idx == top_str || (cur_state && cur_state->has_backref)) 2936 1.1 christos { 2937 1.1 christos if (next_nodes.nelem) 2938 1.1 christos { 2939 1.1 christos err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2940 1.1 christos subexp_num, type); 2941 1.1 christos if (BE ( err != REG_NOERROR, 0)) 2942 1.1 christos { 2943 1.1 christos re_node_set_free (&next_nodes); 2944 1.1 christos return err; 2945 1.1 christos } 2946 1.1 christos } 2947 1.1 christos cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2948 1.1 christos if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 2949 1.1 christos { 2950 1.1 christos re_node_set_free (&next_nodes); 2951 1.1 christos return err; 2952 1.1 christos } 2953 1.1 christos mctx->state_log[str_idx] = cur_state; 2954 1.1 christos } 2955 1.1 christos 2956 1.1 christos for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) 2957 1.1 christos { 2958 1.1 christos re_node_set_empty (&next_nodes); 2959 1.1 christos if (mctx->state_log[str_idx + 1]) 2960 1.1 christos { 2961 1.1 christos err = re_node_set_merge (&next_nodes, 2962 1.1 christos &mctx->state_log[str_idx + 1]->nodes); 2963 1.1 christos if (BE (err != REG_NOERROR, 0)) 2964 1.1 christos { 2965 1.1 christos re_node_set_free (&next_nodes); 2966 1.1 christos return err; 2967 1.1 christos } 2968 1.1 christos } 2969 1.1 christos if (cur_state) 2970 1.1 christos { 2971 1.1 christos err = check_arrival_add_next_nodes (mctx, str_idx, 2972 1.1 christos &cur_state->non_eps_nodes, &next_nodes); 2973 1.1 christos if (BE (err != REG_NOERROR, 0)) 2974 1.1 christos { 2975 1.1 christos re_node_set_free (&next_nodes); 2976 1.1 christos return err; 2977 1.1 christos } 2978 1.1 christos } 2979 1.1 christos ++str_idx; 2980 1.1 christos if (next_nodes.nelem) 2981 1.1 christos { 2982 1.1 christos err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); 2983 1.1 christos if (BE (err != REG_NOERROR, 0)) 2984 1.1 christos { 2985 1.1 christos re_node_set_free (&next_nodes); 2986 1.1 christos return err; 2987 1.1 christos } 2988 1.1 christos err = expand_bkref_cache (mctx, &next_nodes, str_idx, 2989 1.1 christos subexp_num, type); 2990 1.1 christos if (BE ( err != REG_NOERROR, 0)) 2991 1.1 christos { 2992 1.1 christos re_node_set_free (&next_nodes); 2993 1.1 christos return err; 2994 1.1 christos } 2995 1.1 christos } 2996 1.1 christos context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); 2997 1.1 christos cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); 2998 1.1 christos if (BE (cur_state == NULL && err != REG_NOERROR, 0)) 2999 1.1 christos { 3000 1.1 christos re_node_set_free (&next_nodes); 3001 1.1 christos return err; 3002 1.1 christos } 3003 1.1 christos mctx->state_log[str_idx] = cur_state; 3004 1.1 christos null_cnt = cur_state == NULL ? null_cnt + 1 : 0; 3005 1.1 christos } 3006 1.1 christos re_node_set_free (&next_nodes); 3007 1.1 christos cur_nodes = (mctx->state_log[last_str] == NULL ? NULL 3008 1.1 christos : &mctx->state_log[last_str]->nodes); 3009 1.1 christos path->next_idx = str_idx; 3010 1.1 christos 3011 1.1 christos /* Fix MCTX. */ 3012 1.1 christos mctx->state_log = backup_state_log; 3013 1.1 christos mctx->input.cur_idx = backup_cur_idx; 3014 1.1 christos 3015 1.1 christos /* Then check the current node set has the node LAST_NODE. */ 3016 1.1 christos if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) 3017 1.1 christos return REG_NOERROR; 3018 1.1 christos 3019 1.1 christos return REG_NOMATCH; 3020 1.1 christos } 3021 1.1 christos 3022 1.1 christos /* Helper functions for check_arrival. */ 3023 1.1 christos 3024 1.1 christos /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them 3025 1.1 christos to NEXT_NODES. 3026 1.1 christos TODO: This function is similar to the functions transit_state*(), 3027 1.1 christos however this function has many additional works. 3028 1.1 christos Can't we unify them? */ 3029 1.1 christos 3030 1.1 christos static reg_errcode_t 3031 1.1 christos internal_function 3032 1.1 christos check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, 3033 1.1 christos re_node_set *cur_nodes, 3034 1.1 christos re_node_set *next_nodes) 3035 1.1 christos { 3036 1.1 christos re_dfa_t *const dfa = mctx->dfa; 3037 1.1 christos bool ok; 3038 1.1 christos Idx cur_idx; 3039 1.1 christos reg_errcode_t err; 3040 1.1 christos re_node_set union_set; 3041 1.1 christos re_node_set_init_empty (&union_set); 3042 1.1 christos for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) 3043 1.1 christos { 3044 1.1 christos int naccepted = 0; 3045 1.1 christos Idx cur_node = cur_nodes->elems[cur_idx]; 3046 1.1 christos #ifdef DEBUG 3047 1.1 christos re_token_type_t type = dfa->nodes[cur_node].type; 3048 1.1 christos assert (!IS_EPSILON_NODE (type)); 3049 1.1 christos #endif 3050 1.1 christos #ifdef RE_ENABLE_I18N 3051 1.1 christos /* If the node may accept `multi byte'. */ 3052 1.1 christos if (dfa->nodes[cur_node].accept_mb) 3053 1.1 christos { 3054 1.1 christos naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, 3055 1.1 christos str_idx); 3056 1.1 christos if (naccepted > 1) 3057 1.1 christos { 3058 1.1 christos re_dfastate_t *dest_state; 3059 1.1 christos Idx next_node = dfa->nexts[cur_node]; 3060 1.1 christos Idx next_idx = str_idx + naccepted; 3061 1.1 christos dest_state = mctx->state_log[next_idx]; 3062 1.1 christos re_node_set_empty (&union_set); 3063 1.1 christos if (dest_state) 3064 1.1 christos { 3065 1.1 christos err = re_node_set_merge (&union_set, &dest_state->nodes); 3066 1.1 christos if (BE (err != REG_NOERROR, 0)) 3067 1.1 christos { 3068 1.1 christos re_node_set_free (&union_set); 3069 1.1 christos return err; 3070 1.1 christos } 3071 1.1 christos } 3072 1.1 christos ok = re_node_set_insert (&union_set, next_node); 3073 1.1 christos if (BE (! ok, 0)) 3074 1.1 christos { 3075 1.1 christos re_node_set_free (&union_set); 3076 1.1 christos return REG_ESPACE; 3077 1.1 christos } 3078 1.1 christos mctx->state_log[next_idx] = re_acquire_state (&err, dfa, 3079 1.1 christos &union_set); 3080 1.1 christos if (BE (mctx->state_log[next_idx] == NULL 3081 1.1 christos && err != REG_NOERROR, 0)) 3082 1.1 christos { 3083 1.1 christos re_node_set_free (&union_set); 3084 1.1 christos return err; 3085 1.1 christos } 3086 1.1 christos } 3087 1.1 christos } 3088 1.1 christos #endif /* RE_ENABLE_I18N */ 3089 1.1 christos if (naccepted 3090 1.1 christos || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) 3091 1.1 christos { 3092 1.1 christos ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); 3093 1.1 christos if (BE (! ok, 0)) 3094 1.1 christos { 3095 1.1 christos re_node_set_free (&union_set); 3096 1.1 christos return REG_ESPACE; 3097 1.1 christos } 3098 1.1 christos } 3099 1.1 christos } 3100 1.1 christos re_node_set_free (&union_set); 3101 1.1 christos return REG_NOERROR; 3102 1.1 christos } 3103 1.1 christos 3104 1.1 christos /* For all the nodes in CUR_NODES, add the epsilon closures of them to 3105 1.1 christos CUR_NODES, however exclude the nodes which are: 3106 1.1 christos - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. 3107 1.1 christos - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. 3108 1.1 christos */ 3109 1.1 christos 3110 1.1 christos static reg_errcode_t 3111 1.1 christos internal_function 3112 1.1 christos check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes, 3113 1.1 christos Idx ex_subexp, int type) 3114 1.1 christos { 3115 1.1 christos reg_errcode_t err; 3116 1.1 christos Idx idx, outside_node; 3117 1.1 christos re_node_set new_nodes; 3118 1.1 christos #ifdef DEBUG 3119 1.1 christos assert (cur_nodes->nelem); 3120 1.1 christos #endif 3121 1.1 christos err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); 3122 1.1 christos if (BE (err != REG_NOERROR, 0)) 3123 1.1 christos return err; 3124 1.1 christos /* Create a new node set NEW_NODES with the nodes which are epsilon 3125 1.1 christos closures of the node in CUR_NODES. */ 3126 1.1 christos 3127 1.1 christos for (idx = 0; idx < cur_nodes->nelem; ++idx) 3128 1.1 christos { 3129 1.1 christos Idx cur_node = cur_nodes->elems[idx]; 3130 1.1 christos re_node_set *eclosure = dfa->eclosures + cur_node; 3131 1.1 christos outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); 3132 1.1 christos if (outside_node == REG_MISSING) 3133 1.1 christos { 3134 1.1 christos /* There are no problematic nodes, just merge them. */ 3135 1.1 christos err = re_node_set_merge (&new_nodes, eclosure); 3136 1.1 christos if (BE (err != REG_NOERROR, 0)) 3137 1.1 christos { 3138 1.1 christos re_node_set_free (&new_nodes); 3139 1.1 christos return err; 3140 1.1 christos } 3141 1.1 christos } 3142 1.1 christos else 3143 1.1 christos { 3144 1.1 christos /* There are problematic nodes, re-calculate incrementally. */ 3145 1.1 christos err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, 3146 1.1 christos ex_subexp, type); 3147 1.1 christos if (BE (err != REG_NOERROR, 0)) 3148 1.1 christos { 3149 1.1 christos re_node_set_free (&new_nodes); 3150 1.1 christos return err; 3151 1.1 christos } 3152 1.1 christos } 3153 1.1 christos } 3154 1.1 christos re_node_set_free (cur_nodes); 3155 1.1 christos *cur_nodes = new_nodes; 3156 1.1 christos return REG_NOERROR; 3157 1.1 christos } 3158 1.1 christos 3159 1.1 christos /* Helper function for check_arrival_expand_ecl. 3160 1.1 christos Check incrementally the epsilon closure of TARGET, and if it isn't 3161 1.1 christos problematic append it to DST_NODES. */ 3162 1.1 christos 3163 1.1 christos static reg_errcode_t 3164 1.1 christos internal_function 3165 1.1 christos check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes, 3166 1.1 christos Idx target, Idx ex_subexp, int type) 3167 1.1 christos { 3168 1.1 christos Idx cur_node; 3169 1.1 christos for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) 3170 1.1 christos { 3171 1.1 christos bool ok; 3172 1.1 christos 3173 1.1 christos if (dfa->nodes[cur_node].type == type 3174 1.1 christos && dfa->nodes[cur_node].opr.idx == ex_subexp) 3175 1.1 christos { 3176 1.1 christos if (type == OP_CLOSE_SUBEXP) 3177 1.1 christos { 3178 1.1 christos ok = re_node_set_insert (dst_nodes, cur_node); 3179 1.1 christos if (BE (! ok, 0)) 3180 1.1 christos return REG_ESPACE; 3181 1.1 christos } 3182 1.1 christos break; 3183 1.1 christos } 3184 1.1 christos ok = re_node_set_insert (dst_nodes, cur_node); 3185 1.1 christos if (BE (! ok, 0)) 3186 1.1 christos return REG_ESPACE; 3187 1.1 christos if (dfa->edests[cur_node].nelem == 0) 3188 1.1 christos break; 3189 1.1 christos if (dfa->edests[cur_node].nelem == 2) 3190 1.1 christos { 3191 1.1 christos reg_errcode_t ret = 3192 1.1 christos check_arrival_expand_ecl_sub (dfa, dst_nodes, 3193 1.1 christos dfa->edests[cur_node].elems[1], 3194 1.1 christos ex_subexp, type); 3195 1.1 christos if (BE (ret != REG_NOERROR, 0)) 3196 1.1 christos return ret; 3197 1.1 christos } 3198 1.1 christos cur_node = dfa->edests[cur_node].elems[0]; 3199 1.1 christos } 3200 1.1 christos return REG_NOERROR; 3201 1.1 christos } 3202 1.1 christos 3203 1.1 christos 3204 1.1 christos /* For all the back references in the current state, calculate the 3205 1.1 christos destination of the back references by the appropriate entry 3206 1.1 christos in MCTX->BKREF_ENTS. */ 3207 1.1 christos 3208 1.1 christos static reg_errcode_t 3209 1.1 christos internal_function 3210 1.1 christos expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, 3211 1.1 christos Idx cur_str, Idx subexp_num, int type) 3212 1.1 christos { 3213 1.1 christos re_dfa_t *const dfa = mctx->dfa; 3214 1.1 christos reg_errcode_t err; 3215 1.1 christos Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); 3216 1.1 christos struct re_backref_cache_entry *ent; 3217 1.1 christos 3218 1.1 christos if (cache_idx_start == REG_MISSING) 3219 1.1 christos return REG_NOERROR; 3220 1.1 christos 3221 1.1 christos restart: 3222 1.1 christos ent = mctx->bkref_ents + cache_idx_start; 3223 1.1 christos do 3224 1.1 christos { 3225 1.1 christos Idx to_idx, next_node; 3226 1.1 christos 3227 1.1 christos /* Is this entry ENT is appropriate? */ 3228 1.1 christos if (!re_node_set_contains (cur_nodes, ent->node)) 3229 1.1 christos continue; /* No. */ 3230 1.1 christos 3231 1.1 christos to_idx = cur_str + ent->subexp_to - ent->subexp_from; 3232 1.1 christos /* Calculate the destination of the back reference, and append it 3233 1.1 christos to MCTX->STATE_LOG. */ 3234 1.1 christos if (to_idx == cur_str) 3235 1.1 christos { 3236 1.1 christos /* The backreference did epsilon transit, we must re-check all the 3237 1.1 christos node in the current state. */ 3238 1.1 christos re_node_set new_dests; 3239 1.1 christos reg_errcode_t err2, err3; 3240 1.1 christos next_node = dfa->edests[ent->node].elems[0]; 3241 1.1 christos if (re_node_set_contains (cur_nodes, next_node)) 3242 1.1 christos continue; 3243 1.1 christos err = re_node_set_init_1 (&new_dests, next_node); 3244 1.1 christos err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); 3245 1.1 christos err3 = re_node_set_merge (cur_nodes, &new_dests); 3246 1.1 christos re_node_set_free (&new_dests); 3247 1.1 christos if (BE (err != REG_NOERROR || err2 != REG_NOERROR 3248 1.1 christos || err3 != REG_NOERROR, 0)) 3249 1.1 christos { 3250 1.1 christos err = (err != REG_NOERROR ? err 3251 1.1 christos : (err2 != REG_NOERROR ? err2 : err3)); 3252 1.1 christos return err; 3253 1.1 christos } 3254 1.1 christos /* TODO: It is still inefficient... */ 3255 1.1 christos goto restart; 3256 1.1 christos } 3257 1.1 christos else 3258 1.1 christos { 3259 1.1 christos re_node_set union_set; 3260 1.1 christos next_node = dfa->nexts[ent->node]; 3261 1.1 christos if (mctx->state_log[to_idx]) 3262 1.1 christos { 3263 1.1 christos bool ok; 3264 1.1 christos if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, 3265 1.1 christos next_node)) 3266 1.1 christos continue; 3267 1.1 christos err = re_node_set_init_copy (&union_set, 3268 1.1 christos &mctx->state_log[to_idx]->nodes); 3269 1.1 christos ok = re_node_set_insert (&union_set, next_node); 3270 1.1 christos if (BE (err != REG_NOERROR || ! ok, 0)) 3271 1.1 christos { 3272 1.1 christos re_node_set_free (&union_set); 3273 1.1 christos err = err != REG_NOERROR ? err : REG_ESPACE; 3274 1.1 christos return err; 3275 1.1 christos } 3276 1.1 christos } 3277 1.1 christos else 3278 1.1 christos { 3279 1.1 christos err = re_node_set_init_1 (&union_set, next_node); 3280 1.1 christos if (BE (err != REG_NOERROR, 0)) 3281 1.1 christos return err; 3282 1.1 christos } 3283 1.1 christos mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); 3284 1.1 christos re_node_set_free (&union_set); 3285 1.1 christos if (BE (mctx->state_log[to_idx] == NULL 3286 1.1 christos && err != REG_NOERROR, 0)) 3287 1.1 christos return err; 3288 1.1 christos } 3289 1.1 christos } 3290 1.1 christos while (ent++->more); 3291 1.1 christos return REG_NOERROR; 3292 1.1 christos } 3293 1.1 christos 3294 1.1 christos /* Build transition table for the state. 3295 1.1 christos Return true if successful. */ 3296 1.1 christos 3297 1.1 christos static bool 3298 1.1 christos internal_function 3299 1.1 christos build_trtable (re_dfa_t *dfa, re_dfastate_t *state) 3300 1.1 christos { 3301 1.1 christos reg_errcode_t err; 3302 1.1 christos Idx i, j; 3303 1.1 christos int ch; 3304 1.1 christos bool need_word_trtable = false; 3305 1.1 christos bitset_word elem, mask; 3306 1.1 christos bool dests_node_malloced = false, dest_states_malloced = false; 3307 1.1 christos Idx ndests; /* Number of the destination states from `state'. */ 3308 1.1 christos re_dfastate_t **trtable; 3309 1.1 christos re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; 3310 1.1 christos re_node_set follows, *dests_node; 3311 1.1 christos bitset *dests_ch; 3312 1.1 christos bitset acceptable; 3313 1.1 christos 3314 1.1 christos struct dests_alloc 3315 1.1 christos { 3316 1.1 christos re_node_set dests_node[SBC_MAX]; 3317 1.1 christos bitset dests_ch[SBC_MAX]; 3318 1.1 christos } *dests_alloc; 3319 1.1 christos 3320 1.2 christos /* We build DFA states which corresponds to the destination nodes 3321 1.1 christos from `state'. `dests_node[i]' represents the nodes which i-th 3322 1.1 christos destination state contains, and `dests_ch[i]' represents the 3323 1.1 christos characters which i-th destination state accepts. */ 3324 1.2 christos #ifndef __SSP__ 3325 1.1 christos if (__libc_use_alloca (sizeof (struct dests_alloc))) 3326 1.1 christos dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]); 3327 1.1 christos else 3328 1.1 christos #endif 3329 1.1 christos { 3330 1.1 christos dests_alloc = re_malloc (struct dests_alloc, 1); 3331 1.1 christos if (BE (dests_alloc == NULL, 0)) 3332 1.1 christos return false; 3333 1.1 christos dests_node_malloced = true; 3334 1.1 christos } 3335 1.1 christos dests_node = dests_alloc->dests_node; 3336 1.1 christos dests_ch = dests_alloc->dests_ch; 3337 1.1 christos 3338 1.1 christos /* Initialize transiton table. */ 3339 1.1 christos state->word_trtable = state->trtable = NULL; 3340 1.1 christos 3341 1.1 christos /* At first, group all nodes belonging to `state' into several 3342 1.1 christos destinations. */ 3343 1.1 christos ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); 3344 1.1 christos if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) 3345 1.1 christos { 3346 1.1 christos if (dests_node_malloced) 3347 1.1 christos free (dests_alloc); 3348 1.1 christos if (ndests == 0) 3349 1.1 christos { 3350 1.1 christos state->trtable = re_calloc (re_dfastate_t *, SBC_MAX); 3351 1.1 christos return true; 3352 1.1 christos } 3353 1.1 christos return false; 3354 1.1 christos } 3355 1.1 christos 3356 1.1 christos err = re_node_set_alloc (&follows, ndests + 1); 3357 1.1 christos if (BE (err != REG_NOERROR, 0)) 3358 1.1 christos goto out_free; 3359 1.1 christos 3360 1.1 christos /* Avoid arithmetic overflow in size calculation. */ 3361 1.1 christos if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX) 3362 1.2 christos / (3 * sizeof (re_dfastate_t *))) 3363 1.1 christos < ndests, 0)) 3364 1.1 christos goto out_free; 3365 1.1 christos 3366 1.1 christos #ifndef __SSP__ 3367 1.1 christos if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX 3368 1.2 christos + ndests * 3 * sizeof (re_dfastate_t *))) 3369 1.1 christos dest_states = (re_dfastate_t **) 3370 1.1 christos alloca (ndests * 3 * sizeof (re_dfastate_t *)); 3371 1.1 christos else 3372 1.1 christos #endif 3373 1.1 christos { 3374 1.1 christos dest_states = (re_dfastate_t **) 3375 1.1 christos malloc (ndests * 3 * sizeof (re_dfastate_t *)); 3376 1.1 christos if (BE (dest_states == NULL, 0)) 3377 1.1 christos { 3378 1.1 christos out_free: 3379 1.1 christos if (dest_states_malloced) 3380 1.1 christos free (dest_states); 3381 1.1 christos re_node_set_free (&follows); 3382 1.1 christos for (i = 0; i < ndests; ++i) 3383 1.1 christos re_node_set_free (dests_node + i); 3384 1.1 christos if (dests_node_malloced) 3385 1.1 christos free (dests_alloc); 3386 1.1 christos return false; 3387 1.1 christos } 3388 1.1 christos dest_states_malloced = true; 3389 1.1 christos } 3390 1.1 christos dest_states_word = dest_states + ndests; 3391 1.1 christos dest_states_nl = dest_states_word + ndests; 3392 1.1 christos bitset_empty (acceptable); 3393 1.1 christos 3394 1.1 christos /* Then build the states for all destinations. */ 3395 1.1 christos for (i = 0; i < ndests; ++i) 3396 1.1 christos { 3397 1.1 christos Idx next_node; 3398 1.1 christos re_node_set_empty (&follows); 3399 1.1 christos /* Merge the follows of this destination states. */ 3400 1.1 christos for (j = 0; j < dests_node[i].nelem; ++j) 3401 1.1 christos { 3402 1.1 christos next_node = dfa->nexts[dests_node[i].elems[j]]; 3403 1.1 christos if (next_node != REG_MISSING) 3404 1.1 christos { 3405 1.1 christos err = re_node_set_merge (&follows, dfa->eclosures + next_node); 3406 1.1 christos if (BE (err != REG_NOERROR, 0)) 3407 1.1 christos goto out_free; 3408 1.1 christos } 3409 1.1 christos } 3410 1.1 christos dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); 3411 1.1 christos if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) 3412 1.1 christos goto out_free; 3413 1.1 christos /* If the new state has context constraint, 3414 1.1 christos build appropriate states for these contexts. */ 3415 1.1 christos if (dest_states[i]->has_constraint) 3416 1.1 christos { 3417 1.1 christos dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, 3418 1.1 christos CONTEXT_WORD); 3419 1.1 christos if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) 3420 1.1 christos goto out_free; 3421 1.1 christos 3422 1.1 christos if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) 3423 1.1 christos need_word_trtable = true; 3424 1.1 christos 3425 1.1 christos dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, 3426 1.1 christos CONTEXT_NEWLINE); 3427 1.1 christos if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) 3428 1.1 christos goto out_free; 3429 1.1 christos } 3430 1.1 christos else 3431 1.1 christos { 3432 1.1 christos dest_states_word[i] = dest_states[i]; 3433 1.1 christos dest_states_nl[i] = dest_states[i]; 3434 1.1 christos } 3435 1.1 christos bitset_merge (acceptable, dests_ch[i]); 3436 1.1 christos } 3437 1.1 christos 3438 1.1 christos if (!BE (need_word_trtable, 0)) 3439 1.1 christos { 3440 1.1 christos /* We don't care about whether the following character is a word 3441 1.1 christos character, or we are in a single-byte character set so we can 3442 1.1 christos discern by looking at the character code: allocate a 3443 1.1 christos 256-entry transition table. */ 3444 1.1 christos trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX); 3445 1.1 christos if (BE (trtable == NULL, 0)) 3446 1.1 christos goto out_free; 3447 1.1 christos 3448 1.1 christos /* For all characters ch...: */ 3449 1.1 christos for (i = 0; i < BITSET_WORDS; ++i) 3450 1.1 christos for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3451 1.1 christos elem; 3452 1.1 christos mask <<= 1, elem >>= 1, ++ch) 3453 1.1 christos if (BE (elem & 1, 0)) 3454 1.1 christos { 3455 1.1 christos /* There must be exactly one destination which accepts 3456 1.1 christos character ch. See group_nodes_into_DFAstates. */ 3457 1.1 christos for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) 3458 1.1 christos ; 3459 1.1 christos 3460 1.1 christos /* j-th destination accepts the word character ch. */ 3461 1.1 christos if (dfa->word_char[i] & mask) 3462 1.1 christos trtable[ch] = dest_states_word[j]; 3463 1.1 christos else 3464 1.1 christos trtable[ch] = dest_states[j]; 3465 1.1 christos } 3466 1.1 christos } 3467 1.1 christos else 3468 1.1 christos { 3469 1.1 christos /* We care about whether the following character is a word 3470 1.1 christos character, and we are in a multi-byte character set: discern 3471 1.1 christos by looking at the character code: build two 256-entry 3472 1.1 christos transition tables, one starting at trtable[0] and one 3473 1.1 christos starting at trtable[SBC_MAX]. */ 3474 1.1 christos trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX); 3475 1.1 christos if (BE (trtable == NULL, 0)) 3476 1.1 christos goto out_free; 3477 1.1 christos 3478 1.1 christos /* For all characters ch...: */ 3479 1.1 christos for (i = 0; i < BITSET_WORDS; ++i) 3480 1.1 christos for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; 3481 1.1 christos elem; 3482 1.1 christos mask <<= 1, elem >>= 1, ++ch) 3483 1.1 christos if (BE (elem & 1, 0)) 3484 1.1 christos { 3485 1.1 christos /* There must be exactly one destination which accepts 3486 1.1 christos character ch. See group_nodes_into_DFAstates. */ 3487 1.1 christos for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) 3488 1.1 christos ; 3489 1.1 christos 3490 1.1 christos /* j-th destination accepts the word character ch. */ 3491 1.1 christos trtable[ch] = dest_states[j]; 3492 1.1 christos trtable[ch + SBC_MAX] = dest_states_word[j]; 3493 1.1 christos } 3494 1.1 christos } 3495 1.1 christos 3496 1.1 christos /* new line */ 3497 1.1 christos if (bitset_contain (acceptable, NEWLINE_CHAR)) 3498 1.1 christos { 3499 1.1 christos /* The current state accepts newline character. */ 3500 1.1 christos for (j = 0; j < ndests; ++j) 3501 1.1 christos if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) 3502 1.1 christos { 3503 1.1 christos /* k-th destination accepts newline character. */ 3504 1.1 christos trtable[NEWLINE_CHAR] = dest_states_nl[j]; 3505 1.1 christos if (need_word_trtable) 3506 1.1 christos trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; 3507 1.1 christos /* There must be only one destination which accepts 3508 1.1 christos newline. See group_nodes_into_DFAstates. */ 3509 1.1 christos break; 3510 1.1 christos } 3511 1.1 christos } 3512 1.1 christos 3513 1.1 christos if (dest_states_malloced) 3514 1.1 christos free (dest_states); 3515 1.1 christos 3516 1.1 christos re_node_set_free (&follows); 3517 1.1 christos for (i = 0; i < ndests; ++i) 3518 1.1 christos re_node_set_free (dests_node + i); 3519 1.1 christos 3520 1.1 christos if (dests_node_malloced) 3521 1.1 christos free (dests_alloc); 3522 1.1 christos 3523 1.1 christos return true; 3524 1.1 christos } 3525 1.1 christos 3526 1.1 christos /* Group all nodes belonging to STATE into several destinations. 3527 1.1 christos Then for all destinations, set the nodes belonging to the destination 3528 1.1 christos to DESTS_NODE[i] and set the characters accepted by the destination 3529 1.1 christos to DEST_CH[i]. This function return the number of destinations. */ 3530 1.1 christos 3531 1.1 christos static Idx 3532 1.1 christos internal_function 3533 1.1 christos group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, 3534 1.1 christos re_node_set *dests_node, bitset *dests_ch) 3535 1.1 christos { 3536 1.1 christos reg_errcode_t err; 3537 1.1 christos bool ok; 3538 1.1 christos Idx i, j, k; 3539 1.1 christos Idx ndests; /* Number of the destinations from `state'. */ 3540 1.1 christos bitset accepts; /* Characters a node can accept. */ 3541 1.1 christos const re_node_set *cur_nodes = &state->nodes; 3542 1.1 christos bitset_empty (accepts); 3543 1.1 christos ndests = 0; 3544 1.1 christos 3545 1.1 christos /* For all the nodes belonging to `state', */ 3546 1.1 christos for (i = 0; i < cur_nodes->nelem; ++i) 3547 1.1 christos { 3548 1.1 christos re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; 3549 1.1 christos re_token_type_t type = node->type; 3550 1.1 christos unsigned int constraint = node->constraint; 3551 1.1 christos 3552 1.1 christos /* Enumerate all single byte character this node can accept. */ 3553 1.1 christos if (type == CHARACTER) 3554 1.1 christos bitset_set (accepts, node->opr.c); 3555 1.1 christos else if (type == SIMPLE_BRACKET) 3556 1.1 christos { 3557 1.1 christos bitset_merge (accepts, node->opr.sbcset); 3558 1.1 christos } 3559 1.1 christos else if (type == OP_PERIOD) 3560 1.1 christos { 3561 1.1 christos #ifdef RE_ENABLE_I18N 3562 1.1 christos if (dfa->mb_cur_max > 1) 3563 1.1 christos bitset_merge (accepts, dfa->sb_char); 3564 1.1 christos else 3565 1.1 christos #endif 3566 1.1 christos bitset_set_all (accepts); 3567 1.1 christos if (!(dfa->syntax & REG_DOT_NEWLINE)) 3568 1.1 christos bitset_clear (accepts, '\n'); 3569 1.1 christos if (dfa->syntax & REG_DOT_NOT_NULL) 3570 1.1 christos bitset_clear (accepts, '\0'); 3571 1.1 christos } 3572 1.1 christos #ifdef RE_ENABLE_I18N 3573 1.1 christos else if (type == OP_UTF8_PERIOD) 3574 1.1 christos { 3575 1.1 christos if (SBC_MAX / 2 % BITSET_WORD_BITS == 0) 3576 1.1 christos memset (accepts, -1, sizeof accepts / 2); 3577 1.1 christos else 3578 1.1 christos bitset_merge (accepts, utf8_sb_map); 3579 1.1 christos if (!(dfa->syntax & REG_DOT_NEWLINE)) 3580 1.1 christos bitset_clear (accepts, '\n'); 3581 1.1 christos if (dfa->syntax & REG_DOT_NOT_NULL) 3582 1.1 christos bitset_clear (accepts, '\0'); 3583 1.1 christos } 3584 1.1 christos #endif 3585 1.1 christos else 3586 1.1 christos continue; 3587 1.1 christos 3588 1.1 christos /* Check the `accepts' and sift the characters which are not 3589 1.1 christos match it the context. */ 3590 1.1 christos if (constraint) 3591 1.1 christos { 3592 1.1 christos if (constraint & NEXT_NEWLINE_CONSTRAINT) 3593 1.1 christos { 3594 1.1 christos bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); 3595 1.1 christos bitset_empty (accepts); 3596 1.1 christos if (accepts_newline) 3597 1.1 christos bitset_set (accepts, NEWLINE_CHAR); 3598 1.1 christos else 3599 1.1 christos continue; 3600 1.1 christos } 3601 1.1 christos if (constraint & NEXT_ENDBUF_CONSTRAINT) 3602 1.1 christos { 3603 1.1 christos bitset_empty (accepts); 3604 1.1 christos continue; 3605 1.1 christos } 3606 1.1 christos 3607 1.1 christos if (constraint & NEXT_WORD_CONSTRAINT) 3608 1.1 christos { 3609 1.1 christos bitset_word any_set = 0; 3610 1.1 christos if (type == CHARACTER && !node->word_char) 3611 1.1 christos { 3612 1.1 christos bitset_empty (accepts); 3613 1.1 christos continue; 3614 1.1 christos } 3615 1.1 christos #ifdef RE_ENABLE_I18N 3616 1.1 christos if (dfa->mb_cur_max > 1) 3617 1.1 christos for (j = 0; j < BITSET_WORDS; ++j) 3618 1.1 christos any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); 3619 1.1 christos else 3620 1.1 christos #endif 3621 1.1 christos for (j = 0; j < BITSET_WORDS; ++j) 3622 1.1 christos any_set |= (accepts[j] &= dfa->word_char[j]); 3623 1.1 christos if (!any_set) 3624 1.1 christos continue; 3625 1.1 christos } 3626 1.1 christos if (constraint & NEXT_NOTWORD_CONSTRAINT) 3627 1.1 christos { 3628 1.1 christos bitset_word any_set = 0; 3629 1.1 christos if (type == CHARACTER && node->word_char) 3630 1.1 christos { 3631 1.1 christos bitset_empty (accepts); 3632 1.1 christos continue; 3633 1.1 christos } 3634 1.1 christos #ifdef RE_ENABLE_I18N 3635 1.1 christos if (dfa->mb_cur_max > 1) 3636 1.1 christos for (j = 0; j < BITSET_WORDS; ++j) 3637 1.1 christos any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); 3638 1.1 christos else 3639 1.1 christos #endif 3640 1.1 christos for (j = 0; j < BITSET_WORDS; ++j) 3641 1.1 christos any_set |= (accepts[j] &= ~dfa->word_char[j]); 3642 1.1 christos if (!any_set) 3643 1.1 christos continue; 3644 1.1 christos } 3645 1.1 christos } 3646 1.1 christos 3647 1.1 christos /* Then divide `accepts' into DFA states, or create a new 3648 1.1 christos state. Above, we make sure that accepts is not empty. */ 3649 1.1 christos for (j = 0; j < ndests; ++j) 3650 1.1 christos { 3651 1.1 christos bitset intersec; /* Intersection sets, see below. */ 3652 1.1 christos bitset remains; 3653 1.1 christos /* Flags, see below. */ 3654 1.1 christos bitset_word has_intersec, not_subset, not_consumed; 3655 1.1 christos 3656 1.1 christos /* Optimization, skip if this state doesn't accept the character. */ 3657 1.1 christos if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) 3658 1.1 christos continue; 3659 1.1 christos 3660 1.1 christos /* Enumerate the intersection set of this state and `accepts'. */ 3661 1.1 christos has_intersec = 0; 3662 1.1 christos for (k = 0; k < BITSET_WORDS; ++k) 3663 1.1 christos has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; 3664 1.1 christos /* And skip if the intersection set is empty. */ 3665 1.1 christos if (!has_intersec) 3666 1.1 christos continue; 3667 1.1 christos 3668 1.1 christos /* Then check if this state is a subset of `accepts'. */ 3669 1.1 christos not_subset = not_consumed = 0; 3670 1.1 christos for (k = 0; k < BITSET_WORDS; ++k) 3671 1.1 christos { 3672 1.1 christos not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; 3673 1.1 christos not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; 3674 1.1 christos } 3675 1.1 christos 3676 1.1 christos /* If this state isn't a subset of `accepts', create a 3677 1.1 christos new group state, which has the `remains'. */ 3678 1.1 christos if (not_subset) 3679 1.1 christos { 3680 1.1 christos bitset_copy (dests_ch[ndests], remains); 3681 1.1 christos bitset_copy (dests_ch[j], intersec); 3682 1.1 christos err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); 3683 1.1 christos if (BE (err != REG_NOERROR, 0)) 3684 1.1 christos goto error_return; 3685 1.1 christos ++ndests; 3686 1.1 christos } 3687 1.1 christos 3688 1.1 christos /* Put the position in the current group. */ 3689 1.1 christos ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); 3690 1.1 christos if (BE (! ok, 0)) 3691 1.1 christos goto error_return; 3692 1.1 christos 3693 1.1 christos /* If all characters are consumed, go to next node. */ 3694 1.1 christos if (!not_consumed) 3695 1.1 christos break; 3696 1.1 christos } 3697 1.1 christos /* Some characters remain, create a new group. */ 3698 1.1 christos if (j == ndests) 3699 1.1 christos { 3700 1.1 christos bitset_copy (dests_ch[ndests], accepts); 3701 1.1 christos err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); 3702 1.1 christos if (BE (err != REG_NOERROR, 0)) 3703 1.1 christos goto error_return; 3704 1.1 christos ++ndests; 3705 1.1 christos bitset_empty (accepts); 3706 1.1 christos } 3707 1.1 christos } 3708 1.1 christos return ndests; 3709 1.1 christos error_return: 3710 1.1 christos for (j = 0; j < ndests; ++j) 3711 1.1 christos re_node_set_free (dests_node + j); 3712 1.1 christos return REG_MISSING; 3713 1.1 christos } 3714 1.1 christos 3715 1.1 christos #ifdef RE_ENABLE_I18N 3716 1.1 christos /* Check how many bytes the node `dfa->nodes[node_idx]' accepts. 3717 1.1 christos Return the number of the bytes the node accepts. 3718 1.1 christos STR_IDX is the current index of the input string. 3719 1.1 christos 3720 1.1 christos This function handles the nodes which can accept one character, or 3721 1.1 christos one collating element like '.', '[a-z]', opposite to the other nodes 3722 1.1 christos can only accept one byte. */ 3723 1.1 christos 3724 1.1 christos static int 3725 1.1 christos internal_function 3726 1.1 christos check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, 3727 1.1 christos const re_string_t *input, Idx str_idx) 3728 1.1 christos { 3729 1.1 christos const re_token_t *node = dfa->nodes + node_idx; 3730 1.1 christos int char_len, elem_len; 3731 1.1 christos Idx i; 3732 1.1 christos 3733 1.1 christos if (BE (node->type == OP_UTF8_PERIOD, 0)) 3734 1.1 christos { 3735 1.1 christos unsigned char c = re_string_byte_at (input, str_idx), d; 3736 1.1 christos if (BE (c < 0xc2, 1)) 3737 1.1 christos return 0; 3738 1.1 christos 3739 1.1 christos if (str_idx + 2 > input->len) 3740 1.1 christos return 0; 3741 1.1 christos 3742 1.1 christos d = re_string_byte_at (input, str_idx + 1); 3743 1.1 christos if (c < 0xe0) 3744 1.1 christos return (d < 0x80 || d > 0xbf) ? 0 : 2; 3745 1.1 christos else if (c < 0xf0) 3746 1.1 christos { 3747 1.1 christos char_len = 3; 3748 1.1 christos if (c == 0xe0 && d < 0xa0) 3749 1.1 christos return 0; 3750 1.1 christos } 3751 1.1 christos else if (c < 0xf8) 3752 1.1 christos { 3753 1.1 christos char_len = 4; 3754 1.1 christos if (c == 0xf0 && d < 0x90) 3755 1.1 christos return 0; 3756 1.1 christos } 3757 1.1 christos else if (c < 0xfc) 3758 1.1 christos { 3759 1.1 christos char_len = 5; 3760 1.1 christos if (c == 0xf8 && d < 0x88) 3761 1.1 christos return 0; 3762 1.1 christos } 3763 1.1 christos else if (c < 0xfe) 3764 1.1 christos { 3765 1.1 christos char_len = 6; 3766 1.1 christos if (c == 0xfc && d < 0x84) 3767 1.1 christos return 0; 3768 1.1 christos } 3769 1.1 christos else 3770 1.1 christos return 0; 3771 1.1 christos 3772 1.1 christos if (str_idx + char_len > input->len) 3773 1.1 christos return 0; 3774 1.1 christos 3775 1.1 christos for (i = 1; i < char_len; ++i) 3776 1.1 christos { 3777 1.1 christos d = re_string_byte_at (input, str_idx + i); 3778 1.1 christos if (d < 0x80 || d > 0xbf) 3779 1.1 christos return 0; 3780 1.1 christos } 3781 1.1 christos return char_len; 3782 1.1 christos } 3783 1.1 christos 3784 1.1 christos char_len = re_string_char_size_at (input, str_idx); 3785 1.1 christos if (node->type == OP_PERIOD) 3786 1.1 christos { 3787 1.1 christos if (char_len <= 1) 3788 1.1 christos return 0; 3789 1.1 christos /* FIXME: I don't think this if is needed, as both '\n' 3790 1.1 christos and '\0' are char_len == 1. */ 3791 1.1 christos /* '.' accepts any one character except the following two cases. */ 3792 1.1 christos if ((!(dfa->syntax & REG_DOT_NEWLINE) && 3793 1.1 christos re_string_byte_at (input, str_idx) == '\n') || 3794 1.1 christos ((dfa->syntax & REG_DOT_NOT_NULL) && 3795 1.1 christos re_string_byte_at (input, str_idx) == '\0')) 3796 1.1 christos return 0; 3797 1.1 christos return char_len; 3798 1.1 christos } 3799 1.1 christos 3800 1.1 christos elem_len = re_string_elem_size_at (input, str_idx); 3801 1.1 christos if ((elem_len <= 1 && char_len <= 1) || char_len == 0) 3802 1.1 christos return 0; 3803 1.1 christos 3804 1.1 christos if (node->type == COMPLEX_BRACKET) 3805 1.1 christos { 3806 1.1 christos const re_charset_t *cset = node->opr.mbcset; 3807 1.1 christos # ifdef _LIBC 3808 1.1 christos const unsigned char *pin 3809 1.1 christos = ((const unsigned char *) re_string_get_buffer (input) + str_idx); 3810 1.1 christos Idx j; 3811 1.1 christos uint32_t nrules; 3812 1.1 christos # endif /* _LIBC */ 3813 1.1 christos int match_len = 0; 3814 1.1 christos wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) 3815 1.1 christos ? re_string_wchar_at (input, str_idx) : 0); 3816 1.1 christos 3817 1.1 christos /* match with multibyte character? */ 3818 1.1 christos for (i = 0; i < cset->nmbchars; ++i) 3819 1.1 christos if (wc == cset->mbchars[i]) 3820 1.1 christos { 3821 1.1 christos match_len = char_len; 3822 1.1 christos goto check_node_accept_bytes_match; 3823 1.1 christos } 3824 1.1 christos /* match with character_class? */ 3825 1.1 christos for (i = 0; i < cset->nchar_classes; ++i) 3826 1.1 christos { 3827 1.1 christos wctype_t wt = cset->char_classes[i]; 3828 1.1 christos if (__iswctype (wc, wt)) 3829 1.1 christos { 3830 1.1 christos match_len = char_len; 3831 1.1 christos goto check_node_accept_bytes_match; 3832 1.1 christos } 3833 1.1 christos } 3834 1.1 christos 3835 1.1 christos # ifdef _LIBC 3836 1.1 christos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3837 1.1 christos if (nrules != 0) 3838 1.1 christos { 3839 1.1 christos unsigned int in_collseq = 0; 3840 1.1 christos const int32_t *table, *indirect; 3841 1.1 christos const unsigned char *weights, *extra; 3842 1.1 christos const char *collseqwc; 3843 1.1 christos int32_t idx; 3844 1.1 christos /* This #include defines a local function! */ 3845 1.1 christos # include <locale/weight.h> 3846 1.1 christos 3847 1.1 christos /* match with collating_symbol? */ 3848 1.1 christos if (cset->ncoll_syms) 3849 1.1 christos extra = (const unsigned char *) 3850 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 3851 1.1 christos for (i = 0; i < cset->ncoll_syms; ++i) 3852 1.1 christos { 3853 1.1 christos const unsigned char *coll_sym = extra + cset->coll_syms[i]; 3854 1.1 christos /* Compare the length of input collating element and 3855 1.1 christos the length of current collating element. */ 3856 1.1 christos if (*coll_sym != elem_len) 3857 1.1 christos continue; 3858 1.1 christos /* Compare each bytes. */ 3859 1.1 christos for (j = 0; j < *coll_sym; j++) 3860 1.1 christos if (pin[j] != coll_sym[1 + j]) 3861 1.1 christos break; 3862 1.1 christos if (j == *coll_sym) 3863 1.1 christos { 3864 1.1 christos /* Match if every bytes is equal. */ 3865 1.1 christos match_len = j; 3866 1.1 christos goto check_node_accept_bytes_match; 3867 1.1 christos } 3868 1.1 christos } 3869 1.1 christos 3870 1.1 christos if (cset->nranges) 3871 1.1 christos { 3872 1.1 christos if (elem_len <= char_len) 3873 1.1 christos { 3874 1.1 christos collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); 3875 1.1 christos in_collseq = __collseq_table_lookup (collseqwc, wc); 3876 1.1 christos } 3877 1.1 christos else 3878 1.1 christos in_collseq = find_collation_sequence_value (pin, elem_len); 3879 1.1 christos } 3880 1.1 christos /* match with range expression? */ 3881 1.1 christos for (i = 0; i < cset->nranges; ++i) 3882 1.1 christos if (cset->range_starts[i] <= in_collseq 3883 1.1 christos && in_collseq <= cset->range_ends[i]) 3884 1.1 christos { 3885 1.1 christos match_len = elem_len; 3886 1.1 christos goto check_node_accept_bytes_match; 3887 1.1 christos } 3888 1.1 christos 3889 1.1 christos /* match with equivalence_class? */ 3890 1.1 christos if (cset->nequiv_classes) 3891 1.1 christos { 3892 1.1 christos const unsigned char *cp = pin; 3893 1.1 christos table = (const int32_t *) 3894 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 3895 1.1 christos weights = (const unsigned char *) 3896 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 3897 1.1 christos extra = (const unsigned char *) 3898 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 3899 1.1 christos indirect = (const int32_t *) 3900 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 3901 1.1 christos idx = findidx (&cp); 3902 1.1 christos if (idx > 0) 3903 1.1 christos for (i = 0; i < cset->nequiv_classes; ++i) 3904 1.1 christos { 3905 1.1 christos int32_t equiv_class_idx = cset->equiv_classes[i]; 3906 1.1 christos size_t weight_len = weights[idx]; 3907 1.1 christos if (weight_len == weights[equiv_class_idx]) 3908 1.1 christos { 3909 1.1 christos Idx cnt = 0; 3910 1.1 christos while (cnt <= weight_len 3911 1.1 christos && (weights[equiv_class_idx + 1 + cnt] 3912 1.1 christos == weights[idx + 1 + cnt])) 3913 1.1 christos ++cnt; 3914 1.1 christos if (cnt > weight_len) 3915 1.1 christos { 3916 1.1 christos match_len = elem_len; 3917 1.1 christos goto check_node_accept_bytes_match; 3918 1.1 christos } 3919 1.1 christos } 3920 1.1 christos } 3921 1.1 christos } 3922 1.1 christos } 3923 1.1 christos else 3924 1.1 christos # endif /* _LIBC */ 3925 1.1 christos { 3926 1.1 christos /* match with range expression? */ 3927 1.1 christos #if __GNUC__ >= 2 3928 1.1 christos wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; 3929 1.1 christos #else 3930 1.1 christos wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; 3931 1.1 christos cmp_buf[2] = wc; 3932 1.1 christos #endif 3933 1.1 christos for (i = 0; i < cset->nranges; ++i) 3934 1.1 christos { 3935 1.1 christos cmp_buf[0] = cset->range_starts[i]; 3936 1.1 christos cmp_buf[4] = cset->range_ends[i]; 3937 1.1 christos if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 3938 1.1 christos && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) 3939 1.1 christos { 3940 1.1 christos match_len = char_len; 3941 1.1 christos goto check_node_accept_bytes_match; 3942 1.1 christos } 3943 1.1 christos } 3944 1.1 christos } 3945 1.1 christos check_node_accept_bytes_match: 3946 1.1 christos if (!cset->non_match) 3947 1.1 christos return match_len; 3948 1.1 christos else 3949 1.1 christos { 3950 1.1 christos if (match_len > 0) 3951 1.1 christos return 0; 3952 1.1 christos else 3953 1.1 christos return (elem_len > char_len) ? elem_len : char_len; 3954 1.1 christos } 3955 1.1 christos } 3956 1.1 christos return 0; 3957 1.1 christos } 3958 1.1 christos 3959 1.1 christos # ifdef _LIBC 3960 1.1 christos static unsigned int 3961 1.1 christos find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) 3962 1.1 christos { 3963 1.1 christos uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 3964 1.1 christos if (nrules == 0) 3965 1.1 christos { 3966 1.1 christos if (mbs_len == 1) 3967 1.1 christos { 3968 1.1 christos /* No valid character. Match it as a single byte character. */ 3969 1.1 christos const unsigned char *collseq = (const unsigned char *) 3970 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); 3971 1.1 christos return collseq[mbs[0]]; 3972 1.1 christos } 3973 1.1 christos return UINT_MAX; 3974 1.1 christos } 3975 1.1 christos else 3976 1.1 christos { 3977 1.1 christos int32_t idx; 3978 1.1 christos const unsigned char *extra = (const unsigned char *) 3979 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); 3980 1.1 christos int32_t extrasize = (const unsigned char *) 3981 1.1 christos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; 3982 1.1 christos 3983 1.1 christos for (idx = 0; idx < extrasize;) 3984 1.1 christos { 3985 1.1 christos int mbs_cnt; 3986 1.1 christos bool found = false; 3987 1.1 christos int32_t elem_mbs_len; 3988 1.1 christos /* Skip the name of collating element name. */ 3989 1.1 christos idx = idx + extra[idx] + 1; 3990 1.1 christos elem_mbs_len = extra[idx++]; 3991 1.1 christos if (mbs_len == elem_mbs_len) 3992 1.1 christos { 3993 1.1 christos for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) 3994 1.1 christos if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) 3995 1.1 christos break; 3996 1.1 christos if (mbs_cnt == elem_mbs_len) 3997 1.1 christos /* Found the entry. */ 3998 1.1 christos found = true; 3999 1.1 christos } 4000 1.1 christos /* Skip the byte sequence of the collating element. */ 4001 1.1 christos idx += elem_mbs_len; 4002 1.1 christos /* Adjust for the alignment. */ 4003 1.1 christos idx = (idx + 3) & ~3; 4004 1.1 christos /* Skip the collation sequence value. */ 4005 1.1 christos idx += sizeof (uint32_t); 4006 1.1 christos /* Skip the wide char sequence of the collating element. */ 4007 1.1 christos idx = idx + sizeof (uint32_t) * (extra[idx] + 1); 4008 1.1 christos /* If we found the entry, return the sequence value. */ 4009 1.1 christos if (found) 4010 1.1 christos return *(uint32_t *) (extra + idx); 4011 1.1 christos /* Skip the collation sequence value. */ 4012 1.1 christos idx += sizeof (uint32_t); 4013 1.1 christos } 4014 1.1 christos return UINT_MAX; 4015 1.1 christos } 4016 1.1 christos } 4017 1.1 christos # endif /* _LIBC */ 4018 1.1 christos #endif /* RE_ENABLE_I18N */ 4019 1.1 christos 4020 1.1 christos /* Check whether the node accepts the byte which is IDX-th 4021 1.1 christos byte of the INPUT. */ 4022 1.1 christos 4023 1.1 christos static bool 4024 1.1 christos internal_function 4025 1.1 christos check_node_accept (const re_match_context_t *mctx, const re_token_t *node, 4026 1.1 christos Idx idx) 4027 1.1 christos { 4028 1.1 christos unsigned char ch; 4029 1.1 christos ch = re_string_byte_at (&mctx->input, idx); 4030 1.1 christos switch (node->type) 4031 1.1 christos { 4032 1.1 christos case CHARACTER: 4033 1.1 christos if (node->opr.c != ch) 4034 1.1 christos return false; 4035 1.1 christos break; 4036 1.1 christos 4037 1.1 christos case SIMPLE_BRACKET: 4038 1.1 christos if (!bitset_contain (node->opr.sbcset, ch)) 4039 1.1 christos return false; 4040 1.1 christos break; 4041 1.1 christos 4042 1.1 christos #ifdef RE_ENABLE_I18N 4043 1.1 christos case OP_UTF8_PERIOD: 4044 1.1 christos if (ch >= 0x80) 4045 1.1 christos return false; 4046 1.1 christos /* FALLTHROUGH */ 4047 1.1 christos #endif 4048 1.1 christos case OP_PERIOD: 4049 1.1 christos if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE)) 4050 1.1 christos || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL))) 4051 1.1 christos return false; 4052 1.1 christos break; 4053 1.1 christos 4054 1.1 christos default: 4055 1.1 christos return false; 4056 1.1 christos } 4057 1.1 christos 4058 1.1 christos if (node->constraint) 4059 1.1 christos { 4060 1.1 christos /* The node has constraints. Check whether the current context 4061 1.1 christos satisfies the constraints. */ 4062 1.1 christos unsigned int context = re_string_context_at (&mctx->input, idx, 4063 1.1 christos mctx->eflags); 4064 1.1 christos if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) 4065 1.1 christos return false; 4066 1.1 christos } 4067 1.1 christos 4068 1.1 christos return true; 4069 1.1 christos } 4070 1.1 christos 4071 1.1 christos /* Extend the buffers, if the buffers have run out. */ 4072 1.1 christos 4073 1.1 christos static reg_errcode_t 4074 1.1 christos internal_function 4075 1.1 christos extend_buffers (re_match_context_t *mctx) 4076 1.1 christos { 4077 1.1 christos reg_errcode_t ret; 4078 1.1 christos re_string_t *pstr = &mctx->input; 4079 1.1 christos 4080 1.1 christos /* Double the lengthes of the buffers. */ 4081 1.1 christos ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); 4082 1.1 christos if (BE (ret != REG_NOERROR, 0)) 4083 1.1 christos return ret; 4084 1.1 christos 4085 1.1 christos if (mctx->state_log != NULL) 4086 1.1 christos { 4087 1.1 christos /* And double the length of state_log. */ 4088 1.1 christos /* XXX We have no indication of the size of this buffer. If this 4089 1.1 christos allocation fail we have no indication that the state_log array 4090 1.1 christos does not have the right size. */ 4091 1.1 christos re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *, 4092 1.1 christos pstr->bufs_len + 1); 4093 1.1 christos if (BE (new_array == NULL, 0)) 4094 1.1 christos return REG_ESPACE; 4095 1.1 christos mctx->state_log = new_array; 4096 1.1 christos } 4097 1.1 christos 4098 1.1 christos /* Then reconstruct the buffers. */ 4099 1.1 christos if (pstr->icase) 4100 1.1 christos { 4101 1.1 christos #ifdef RE_ENABLE_I18N 4102 1.1 christos if (pstr->mb_cur_max > 1) 4103 1.1 christos { 4104 1.1 christos ret = build_wcs_upper_buffer (pstr); 4105 1.1 christos if (BE (ret != REG_NOERROR, 0)) 4106 1.1 christos return ret; 4107 1.1 christos } 4108 1.1 christos else 4109 1.1 christos #endif /* RE_ENABLE_I18N */ 4110 1.1 christos build_upper_buffer (pstr); 4111 1.1 christos } 4112 1.1 christos else 4113 1.1 christos { 4114 1.1 christos #ifdef RE_ENABLE_I18N 4115 1.1 christos if (pstr->mb_cur_max > 1) 4116 1.1 christos build_wcs_buffer (pstr); 4117 1.1 christos else 4118 1.1 christos #endif /* RE_ENABLE_I18N */ 4119 1.1 christos { 4120 1.1 christos if (pstr->trans != NULL) 4121 1.1 christos re_string_translate_buffer (pstr); 4122 1.1 christos } 4123 1.1 christos } 4124 1.1 christos return REG_NOERROR; 4125 1.1 christos } 4126 1.1 christos 4127 1.1 christos 4128 1.1 christos /* Functions for matching context. */ 4130 1.1 christos 4131 1.1 christos /* Initialize MCTX. */ 4132 1.1 christos 4133 1.1 christos static reg_errcode_t 4134 1.1 christos internal_function 4135 1.1 christos match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) 4136 1.1 christos { 4137 1.1 christos mctx->eflags = eflags; 4138 1.1 christos mctx->match_last = REG_MISSING; 4139 1.1 christos if (n > 0) 4140 1.1 christos { 4141 1.1 christos mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n); 4142 1.1 christos mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n); 4143 1.1 christos if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) 4144 1.1 christos return REG_ESPACE; 4145 1.1 christos } 4146 1.1 christos /* Already zero-ed by the caller. 4147 1.1 christos else 4148 1.1 christos mctx->bkref_ents = NULL; 4149 1.1 christos mctx->nbkref_ents = 0; 4150 1.1 christos mctx->nsub_tops = 0; */ 4151 1.1 christos mctx->abkref_ents = n; 4152 1.1 christos mctx->max_mb_elem_len = 1; 4153 1.1 christos mctx->asub_tops = n; 4154 1.1 christos return REG_NOERROR; 4155 1.1 christos } 4156 1.1 christos 4157 1.1 christos /* Clean the entries which depend on the current input in MCTX. 4158 1.1 christos This function must be invoked when the matcher changes the start index 4159 1.1 christos of the input, or changes the input string. */ 4160 1.1 christos 4161 1.1 christos static void 4162 1.1 christos internal_function 4163 1.1 christos match_ctx_clean (re_match_context_t *mctx) 4164 1.1 christos { 4165 1.1 christos Idx st_idx; 4166 1.1 christos for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) 4167 1.1 christos { 4168 1.1 christos Idx sl_idx; 4169 1.1 christos re_sub_match_top_t *top = mctx->sub_tops[st_idx]; 4170 1.1 christos for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) 4171 1.1 christos { 4172 1.1 christos re_sub_match_last_t *last = top->lasts[sl_idx]; 4173 1.1 christos re_free (last->path.array); 4174 1.1 christos re_free (last); 4175 1.1 christos } 4176 1.1 christos re_free (top->lasts); 4177 1.1 christos if (top->path) 4178 1.1 christos { 4179 1.1 christos re_free (top->path->array); 4180 1.1 christos re_free (top->path); 4181 1.1 christos } 4182 1.1 christos free (top); 4183 1.1 christos } 4184 1.1 christos 4185 1.1 christos mctx->nsub_tops = 0; 4186 1.1 christos mctx->nbkref_ents = 0; 4187 1.1 christos } 4188 1.1 christos 4189 1.1 christos /* Free all the memory associated with MCTX. */ 4190 1.1 christos 4191 1.1 christos static void 4192 1.1 christos internal_function 4193 1.1 christos match_ctx_free (re_match_context_t *mctx) 4194 1.1 christos { 4195 1.1 christos /* First, free all the memory associated with MCTX->SUB_TOPS. */ 4196 1.1 christos match_ctx_clean (mctx); 4197 1.1 christos re_free (mctx->sub_tops); 4198 1.1 christos re_free (mctx->bkref_ents); 4199 1.1 christos } 4200 1.1 christos 4201 1.1 christos /* Add a new backreference entry to MCTX. 4202 1.1 christos Note that we assume that caller never call this function with duplicate 4203 1.1 christos entry, and call with STR_IDX which isn't smaller than any existing entry. 4204 1.1 christos */ 4205 1.1 christos 4206 1.1 christos static reg_errcode_t 4207 1.1 christos internal_function 4208 1.1 christos match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, 4209 1.1 christos Idx from, Idx to) 4210 1.1 christos { 4211 1.1 christos if (mctx->nbkref_ents >= mctx->abkref_ents) 4212 1.1 christos { 4213 1.1 christos struct re_backref_cache_entry* new_entry; 4214 1.1 christos new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry, 4215 1.1 christos &mctx->abkref_ents); 4216 1.1 christos if (BE (new_entry == NULL, 0)) 4217 1.1 christos { 4218 1.1 christos re_free (mctx->bkref_ents); 4219 1.1 christos return REG_ESPACE; 4220 1.1 christos } 4221 1.1 christos mctx->bkref_ents = new_entry; 4222 1.1 christos memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', 4223 1.1 christos (sizeof (struct re_backref_cache_entry) 4224 1.1 christos * (mctx->abkref_ents - mctx->nbkref_ents))); 4225 1.1 christos } 4226 1.1 christos if (mctx->nbkref_ents > 0 4227 1.1 christos && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) 4228 1.1 christos mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; 4229 1.1 christos 4230 1.1 christos mctx->bkref_ents[mctx->nbkref_ents].node = node; 4231 1.1 christos mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; 4232 1.1 christos mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; 4233 1.1 christos mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; 4234 1.1 christos 4235 1.1 christos /* This is a cache that saves negative results of check_dst_limits_calc_pos. 4236 1.1 christos If bit N is clear, means that this entry won't epsilon-transition to 4237 1.1 christos an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If 4238 1.1 christos it is set, check_dst_limits_calc_pos_1 will recurse and try to find one 4239 1.1 christos such node. 4240 1.1 christos 4241 1.1 christos A backreference does not epsilon-transition unless it is empty, so set 4242 1.1 christos to all zeros if FROM != TO. */ 4243 1.1 christos mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map 4244 1.1 christos = (from == to ? -1 : 0); 4245 1.1 christos 4246 1.1 christos mctx->bkref_ents[mctx->nbkref_ents++].more = 0; 4247 1.1 christos if (mctx->max_mb_elem_len < to - from) 4248 1.1 christos mctx->max_mb_elem_len = to - from; 4249 1.1 christos return REG_NOERROR; 4250 1.1 christos } 4251 1.1 christos 4252 1.1 christos /* Return the first entry with the same str_idx, or REG_MISSING if none is 4253 1.1 christos found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ 4254 1.1 christos 4255 1.1 christos static Idx 4256 1.1 christos internal_function 4257 1.1 christos search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) 4258 1.1 christos { 4259 1.1 christos Idx left, right, mid, last; 4260 1.1 christos last = right = mctx->nbkref_ents; 4261 1.1 christos for (left = 0; left < right;) 4262 1.1 christos { 4263 1.1 christos mid = (left + right) / 2; 4264 1.1 christos if (mctx->bkref_ents[mid].str_idx < str_idx) 4265 1.1 christos left = mid + 1; 4266 1.1 christos else 4267 1.1 christos right = mid; 4268 1.1 christos } 4269 1.1 christos if (left < last && mctx->bkref_ents[left].str_idx == str_idx) 4270 1.1 christos return left; 4271 1.1 christos else 4272 1.1 christos return REG_MISSING; 4273 1.1 christos } 4274 1.1 christos 4275 1.1 christos /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches 4276 1.1 christos at STR_IDX. */ 4277 1.1 christos 4278 1.1 christos static reg_errcode_t 4279 1.1 christos internal_function 4280 1.1 christos match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) 4281 1.1 christos { 4282 1.1 christos #ifdef DEBUG 4283 1.1 christos assert (mctx->sub_tops != NULL); 4284 1.1 christos assert (mctx->asub_tops > 0); 4285 1.1 christos #endif 4286 1.1 christos if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) 4287 1.1 christos { 4288 1.1 christos Idx new_asub_tops = mctx->asub_tops; 4289 1.1 christos re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops, 4290 1.1 christos re_sub_match_top_t *, 4291 1.1 christos &new_asub_tops); 4292 1.1 christos if (BE (new_array == NULL, 0)) 4293 1.1 christos return REG_ESPACE; 4294 1.1 christos mctx->sub_tops = new_array; 4295 1.1 christos mctx->asub_tops = new_asub_tops; 4296 1.1 christos } 4297 1.1 christos mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1); 4298 1.1 christos if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) 4299 1.1 christos return REG_ESPACE; 4300 1.1 christos mctx->sub_tops[mctx->nsub_tops]->node = node; 4301 1.1 christos mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; 4302 1.1 christos return REG_NOERROR; 4303 1.1 christos } 4304 1.1 christos 4305 1.1 christos /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches 4306 1.1 christos at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ 4307 1.1 christos 4308 1.1 christos static re_sub_match_last_t * 4309 1.1 christos internal_function 4310 1.1 christos match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) 4311 1.1 christos { 4312 1.1 christos re_sub_match_last_t *new_entry; 4313 1.1 christos if (BE (subtop->nlasts == subtop->alasts, 0)) 4314 1.1 christos { 4315 1.1 christos Idx new_alasts = subtop->alasts; 4316 1.1 christos re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts, 4317 1.1 christos re_sub_match_last_t *, 4318 1.1 christos &new_alasts); 4319 1.1 christos if (BE (new_array == NULL, 0)) 4320 1.1 christos return NULL; 4321 1.1 christos subtop->lasts = new_array; 4322 1.1 christos subtop->alasts = new_alasts; 4323 1.1 christos } 4324 1.1 christos new_entry = re_calloc (re_sub_match_last_t, 1); 4325 1.1 christos if (BE (new_entry != NULL, 1)) 4326 1.1 christos { 4327 1.1 christos subtop->lasts[subtop->nlasts] = new_entry; 4328 1.1 christos new_entry->node = node; 4329 1.1 christos new_entry->str_idx = str_idx; 4330 1.1 christos ++subtop->nlasts; 4331 1.1 christos } 4332 1.1 christos return new_entry; 4333 1.1 christos } 4334 1.1 christos 4335 1.1 christos static void 4336 1.1 christos internal_function 4337 1.1 christos sift_ctx_init (re_sift_context_t *sctx, 4338 1.1 christos re_dfastate_t **sifted_sts, 4339 1.1 christos re_dfastate_t **limited_sts, 4340 1.1 christos Idx last_node, Idx last_str_idx) 4341 1.1 christos { 4342 1.1 christos sctx->sifted_states = sifted_sts; 4343 sctx->limited_states = limited_sts; 4344 sctx->last_node = last_node; 4345 sctx->last_str_idx = last_str_idx; 4346 re_node_set_init_empty (&sctx->limits); 4347 } 4348