Home | History | Annotate | Line # | Download | only in lib
      1  1.1  christos /* Extended regular expression matching and search library.
      2  1.1  christos    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
      3  1.1  christos    This file is part of the GNU C Library.
      4  1.1  christos    Contributed by Isamu Hasegawa <isamu (at) yamato.ibm.com>.
      5  1.1  christos 
      6  1.1  christos    This program is free software; you can redistribute it and/or modify
      7  1.1  christos    it under the terms of the GNU General Public License as published by
      8  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
      9  1.1  christos    any later version.
     10  1.1  christos 
     11  1.1  christos    This program is distributed in the hope that it will be useful,
     12  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14  1.1  christos    GNU General Public License for more details.
     15  1.1  christos 
     16  1.1  christos    You should have received a copy of the GNU General Public License along
     17  1.1  christos    with this program; if not, write to the Free Software Foundation,
     18  1.1  christos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
     19  1.3  christos #include <sys/cdefs.h>
     20  1.3  christos __RCSID("$NetBSD: regexec.c,v 1.3 2016/05/17 14:00:09 christos Exp $");
     21  1.3  christos 
     22  1.1  christos 
     23  1.1  christos static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
     24  1.1  christos 				     Idx n) internal_function;
     25  1.1  christos static void match_ctx_clean (re_match_context_t *mctx) internal_function;
     26  1.1  christos static void match_ctx_free (re_match_context_t *cache) internal_function;
     27  1.1  christos static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
     28  1.1  christos 					  Idx str_idx, Idx from, Idx to)
     29  1.1  christos      internal_function;
     30  1.1  christos static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
     31  1.1  christos      internal_function;
     32  1.1  christos static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
     33  1.1  christos 					   Idx str_idx) internal_function;
     34  1.1  christos static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
     35  1.1  christos 						    Idx node, Idx str_idx)
     36  1.1  christos      internal_function;
     37  1.1  christos static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
     38  1.1  christos 			   re_dfastate_t **limited_sts, Idx last_node,
     39  1.1  christos 			   Idx last_str_idx)
     40  1.1  christos      internal_function;
     41  1.1  christos static reg_errcode_t re_search_internal (const regex_t *preg,
     42  1.1  christos 					 const char *string, Idx length,
     43  1.1  christos 					 Idx start, Idx last_start, Idx stop,
     44  1.1  christos 					 size_t nmatch, regmatch_t pmatch[],
     45  1.1  christos 					 int eflags) internal_function;
     46  1.1  christos static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
     47  1.1  christos 				  const char *string1, Idx length1,
     48  1.1  christos 				  const char *string2, Idx length2,
     49  1.1  christos 				  Idx start, regoff_t range,
     50  1.1  christos 				  struct re_registers *regs,
     51  1.1  christos 				  Idx stop, bool ret_len) internal_function;
     52  1.1  christos static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
     53  1.1  christos 				const char *string, Idx length, Idx start,
     54  1.1  christos 				regoff_t range, Idx stop,
     55  1.1  christos 				struct re_registers *regs,
     56  1.1  christos 				bool ret_len) internal_function;
     57  1.1  christos static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
     58  1.1  christos 			      Idx nregs, int regs_allocated) internal_function;
     59  1.1  christos static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
     60  1.1  christos      internal_function;
     61  1.1  christos static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
     62  1.1  christos 			   Idx *p_match_first)
     63  1.1  christos      internal_function;
     64  1.1  christos static Idx check_halt_state_context (const re_match_context_t *mctx,
     65  1.1  christos 				     const re_dfastate_t *state, Idx idx)
     66  1.1  christos      internal_function;
     67  1.1  christos static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
     68  1.1  christos 			 regmatch_t *prev_idx_match, Idx cur_node,
     69  1.1  christos 			 Idx cur_idx, Idx nmatch) internal_function;
     70  1.1  christos static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
     71  1.1  christos 				      Idx str_idx, Idx dest_node, Idx nregs,
     72  1.1  christos 				      regmatch_t *regs,
     73  1.1  christos 				      re_node_set *eps_via_nodes) internal_function;
     74  1.1  christos static reg_errcode_t set_regs (const regex_t *preg,
     75  1.1  christos 			       const re_match_context_t *mctx,
     76  1.1  christos 			       size_t nmatch, regmatch_t *pmatch,
     77  1.1  christos 			       bool fl_backtrack) internal_function;
     78  1.1  christos static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
     79  1.1  christos 
     80  1.1  christos #ifdef RE_ENABLE_I18N
     81  1.1  christos static int sift_states_iter_mb (const re_match_context_t *mctx,
     82  1.1  christos 				re_sift_context_t *sctx,
     83  1.1  christos 				Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function;
     84  1.1  christos #endif /* RE_ENABLE_I18N */
     85  1.1  christos static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
     86  1.1  christos 					   re_sift_context_t *sctx) internal_function;
     87  1.1  christos static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
     88  1.1  christos 					  re_sift_context_t *sctx, Idx str_idx,
     89  1.1  christos 					  re_node_set *cur_dest) internal_function;
     90  1.1  christos static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
     91  1.1  christos 					      re_sift_context_t *sctx,
     92  1.1  christos 					      Idx str_idx,
     93  1.1  christos 					      re_node_set *dest_nodes) internal_function;
     94  1.1  christos static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
     95  1.1  christos 					    re_node_set *dest_nodes,
     96  1.1  christos 					    const re_node_set *candidates) internal_function;
     97  1.1  christos static bool check_dst_limits (const re_match_context_t *mctx,
     98  1.1  christos 			      const re_node_set *limits,
     99  1.1  christos 			      Idx dst_node, Idx dst_idx, Idx src_node,
    100  1.1  christos 			      Idx src_idx) internal_function;
    101  1.1  christos static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
    102  1.1  christos 					int boundaries, Idx subexp_idx,
    103  1.1  christos 					Idx from_node, Idx bkref_idx) internal_function;
    104  1.1  christos static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
    105  1.1  christos 				      Idx limit, Idx subexp_idx,
    106  1.1  christos 				      Idx node, Idx str_idx,
    107  1.1  christos 				      Idx bkref_idx) internal_function;
    108  1.1  christos static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
    109  1.1  christos 					  re_node_set *dest_nodes,
    110  1.1  christos 					  const re_node_set *candidates,
    111  1.1  christos 					  re_node_set *limits,
    112  1.1  christos 					  struct re_backref_cache_entry *bkref_ents,
    113  1.1  christos 					  Idx str_idx) internal_function;
    114  1.1  christos static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
    115  1.1  christos 					re_sift_context_t *sctx,
    116  1.1  christos 					Idx str_idx, const re_node_set *candidates) internal_function;
    117  1.1  christos static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
    118  1.1  christos 					re_dfastate_t **src, Idx num) internal_function;
    119  1.1  christos static re_dfastate_t *find_recover_state (reg_errcode_t *err,
    120  1.1  christos 					 re_match_context_t *mctx) internal_function;
    121  1.1  christos static re_dfastate_t *transit_state (reg_errcode_t *err,
    122  1.1  christos 				     re_match_context_t *mctx,
    123  1.1  christos 				     re_dfastate_t *state) internal_function;
    124  1.1  christos static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
    125  1.1  christos 					    re_match_context_t *mctx,
    126  1.1  christos 					    re_dfastate_t *next_state) internal_function;
    127  1.1  christos static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
    128  1.1  christos 						re_node_set *cur_nodes,
    129  1.1  christos 						Idx str_idx) internal_function;
    130  1.1  christos #if 0
    131  1.1  christos static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
    132  1.1  christos 					re_match_context_t *mctx,
    133  1.1  christos 					re_dfastate_t *pstate) internal_function;
    134  1.1  christos #endif
    135  1.1  christos #ifdef RE_ENABLE_I18N
    136  1.1  christos static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
    137  1.1  christos 				       re_dfastate_t *pstate) internal_function;
    138  1.1  christos #endif /* RE_ENABLE_I18N */
    139  1.1  christos static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
    140  1.1  christos 					  const re_node_set *nodes) internal_function;
    141  1.1  christos static reg_errcode_t get_subexp (re_match_context_t *mctx,
    142  1.1  christos 				 Idx bkref_node, Idx bkref_str_idx) internal_function;
    143  1.1  christos static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
    144  1.1  christos 				     const re_sub_match_top_t *sub_top,
    145  1.1  christos 				     re_sub_match_last_t *sub_last,
    146  1.1  christos 				     Idx bkref_node, Idx bkref_str) internal_function;
    147  1.1  christos static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
    148  1.1  christos 			     Idx subexp_idx, int type) internal_function;
    149  1.1  christos static reg_errcode_t check_arrival (re_match_context_t *mctx,
    150  1.1  christos 				    state_array_t *path, Idx top_node,
    151  1.1  christos 				    Idx top_str, Idx last_node, Idx last_str,
    152  1.1  christos 				    int type) internal_function;
    153  1.1  christos static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
    154  1.1  christos 						   Idx str_idx,
    155  1.1  christos 						   re_node_set *cur_nodes,
    156  1.1  christos 						   re_node_set *next_nodes) internal_function;
    157  1.1  christos static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
    158  1.1  christos 					       re_node_set *cur_nodes,
    159  1.1  christos 					       Idx ex_subexp, int type) internal_function;
    160  1.1  christos static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
    161  1.1  christos 						   re_node_set *dst_nodes,
    162  1.1  christos 						   Idx target, Idx ex_subexp,
    163  1.1  christos 						   int type) internal_function;
    164  1.1  christos static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
    165  1.1  christos 					 re_node_set *cur_nodes, Idx cur_str,
    166  1.1  christos 					 Idx subexp_num, int type) internal_function;
    167  1.1  christos static bool build_trtable (re_dfa_t *dfa,
    168  1.1  christos 			   re_dfastate_t *state) internal_function;
    169  1.1  christos #ifdef RE_ENABLE_I18N
    170  1.1  christos static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
    171  1.1  christos 				    const re_string_t *input, Idx idx) internal_function;
    172  1.1  christos # ifdef _LIBC
    173  1.1  christos static unsigned int find_collation_sequence_value (const unsigned char *mbs,
    174  1.1  christos 						   size_t name_len) internal_function;
    175  1.1  christos # endif /* _LIBC */
    176  1.1  christos #endif /* RE_ENABLE_I18N */
    177  1.1  christos static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
    178  1.1  christos 				       const re_dfastate_t *state,
    179  1.1  christos 				       re_node_set *states_node,
    180  1.1  christos 				       bitset *states_ch) internal_function;
    181  1.1  christos static bool check_node_accept (const re_match_context_t *mctx,
    182  1.1  christos 			       const re_token_t *node, Idx idx)
    183  1.1  christos      internal_function;
    184  1.1  christos static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
    185  1.1  christos 
    186  1.1  christos /* Entry point for POSIX code.  */
    188  1.1  christos 
    189  1.1  christos /* regexec searches for a given pattern, specified by PREG, in the
    190  1.1  christos    string STRING.
    191  1.1  christos 
    192  1.1  christos    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
    193  1.1  christos    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
    194  1.1  christos    least NMATCH elements, and we set them to the offsets of the
    195  1.1  christos    corresponding matched substrings.
    196  1.1  christos 
    197  1.1  christos    EFLAGS specifies `execution flags' which affect matching: if
    198  1.1  christos    REG_NOTBOL is set, then ^ does not match at the beginning of the
    199  1.1  christos    string; if REG_NOTEOL is set, then $ does not match at the end.
    200  1.1  christos 
    201  1.1  christos    We return 0 if we find a match and REG_NOMATCH if not.  */
    202  1.1  christos 
    203  1.1  christos int
    204  1.1  christos regexec (const regex_t *__restrict preg, const char *__restrict string,
    205  1.1  christos 	 size_t nmatch, regmatch_t pmatch[], int eflags)
    206  1.1  christos {
    207  1.1  christos   reg_errcode_t err;
    208  1.1  christos   Idx start, length;
    209  1.1  christos #ifdef _LIBC
    210  1.1  christos   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
    211  1.1  christos #endif
    212  1.1  christos 
    213  1.1  christos   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
    214  1.1  christos     return REG_BADPAT;
    215  1.1  christos 
    216  1.1  christos   if (eflags & REG_STARTEND)
    217  1.1  christos     {
    218  1.1  christos       start = pmatch[0].rm_so;
    219  1.1  christos       length = pmatch[0].rm_eo;
    220  1.1  christos     }
    221  1.1  christos   else
    222  1.1  christos     {
    223  1.1  christos       start = 0;
    224  1.1  christos       length = strlen (string);
    225  1.1  christos     }
    226  1.1  christos 
    227  1.1  christos   __libc_lock_lock (dfa->lock);
    228  1.1  christos   if (preg->re_no_sub)
    229  1.1  christos     err = re_search_internal (preg, string, length, start, length,
    230  1.1  christos 			      length, 0, NULL, eflags);
    231  1.1  christos   else
    232  1.1  christos     err = re_search_internal (preg, string, length, start, length,
    233  1.1  christos 			      length, nmatch, pmatch, eflags);
    234  1.1  christos   __libc_lock_unlock (dfa->lock);
    235  1.1  christos   return err != REG_NOERROR;
    236  1.1  christos }
    237  1.1  christos 
    238  1.1  christos #ifdef _LIBC
    239  1.1  christos # include <shlib-compat.h>
    240  1.1  christos versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
    241  1.1  christos 
    242  1.1  christos # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
    243  1.1  christos __typeof__ (__regexec) __compat_regexec;
    244  1.1  christos 
    245  1.1  christos int
    246  1.1  christos attribute_compat_text_section
    247  1.1  christos __compat_regexec (const regex_t *__restrict preg,
    248  1.1  christos 		  const char *__restrict string, size_t nmatch,
    249  1.1  christos 		  regmatch_t pmatch[], int eflags)
    250  1.1  christos {
    251  1.1  christos   return regexec (preg, string, nmatch, pmatch,
    252  1.1  christos 		  eflags & (REG_NOTBOL | REG_NOTEOL));
    253  1.1  christos }
    254  1.1  christos compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
    255  1.1  christos # endif
    256  1.1  christos #endif
    257  1.1  christos 
    258  1.1  christos /* Entry points for GNU code.  */
    259  1.1  christos 
    260  1.1  christos /* re_match, re_search, re_match_2, re_search_2
    261  1.1  christos 
    262  1.1  christos    The former two functions operate on STRING with length LENGTH,
    263  1.1  christos    while the later two operate on concatenation of STRING1 and STRING2
    264  1.1  christos    with lengths LENGTH1 and LENGTH2, respectively.
    265  1.1  christos 
    266  1.1  christos    re_match() matches the compiled pattern in BUFP against the string,
    267  1.1  christos    starting at index START.
    268  1.1  christos 
    269  1.1  christos    re_search() first tries matching at index START, then it tries to match
    270  1.1  christos    starting from index START + 1, and so on.  The last start position tried
    271  1.1  christos    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
    272  1.1  christos    way as re_match().)
    273  1.1  christos 
    274  1.1  christos    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
    275  1.1  christos    the first STOP characters of the concatenation of the strings should be
    276  1.1  christos    concerned.
    277  1.1  christos 
    278  1.1  christos    If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
    279  1.1  christos    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
    280  1.1  christos    computed relative to the concatenation, not relative to the individual
    281  1.1  christos    strings.)
    282  1.1  christos 
    283  1.1  christos    On success, re_match* functions return the length of the match, re_search*
    284  1.1  christos    return the position of the start of the match.  Return value -1 means no
    285  1.1  christos    match was found and -2 indicates an internal error.  */
    286  1.1  christos 
    287  1.1  christos regoff_t
    288  1.1  christos re_match (struct re_pattern_buffer *bufp, const char *string,
    289  1.1  christos 	  Idx length, Idx start, struct re_registers *regs)
    290  1.1  christos {
    291  1.1  christos   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
    292  1.1  christos }
    293  1.1  christos #ifdef _LIBC
    294  1.1  christos weak_alias (__re_match, re_match)
    295  1.1  christos #endif
    296  1.1  christos 
    297  1.1  christos regoff_t
    298  1.1  christos re_search (struct re_pattern_buffer *bufp, const char *string,
    299  1.1  christos 	   Idx length, Idx start, regoff_t range, struct re_registers *regs)
    300  1.1  christos {
    301  1.1  christos   return re_search_stub (bufp, string, length, start, range, length, regs,
    302  1.1  christos 			 false);
    303  1.1  christos }
    304  1.1  christos #ifdef _LIBC
    305  1.1  christos weak_alias (__re_search, re_search)
    306  1.1  christos #endif
    307  1.1  christos 
    308  1.1  christos regoff_t
    309  1.1  christos re_match_2 (struct re_pattern_buffer *bufp,
    310  1.1  christos 	    const char *string1, Idx length1,
    311  1.1  christos 	    const char *string2, Idx length2,
    312  1.1  christos 	    Idx start, struct re_registers *regs, Idx stop)
    313  1.1  christos {
    314  1.1  christos   return re_search_2_stub (bufp, string1, length1, string2, length2,
    315  1.1  christos 			   start, 0, regs, stop, true);
    316  1.1  christos }
    317  1.1  christos #ifdef _LIBC
    318  1.1  christos weak_alias (__re_match_2, re_match_2)
    319  1.1  christos #endif
    320  1.1  christos 
    321  1.1  christos regoff_t
    322  1.1  christos re_search_2 (struct re_pattern_buffer *bufp,
    323  1.1  christos 	     const char *string1, Idx length1,
    324  1.1  christos 	     const char *string2, Idx length2,
    325  1.1  christos 	     Idx start, regoff_t range, struct re_registers *regs, Idx stop)
    326  1.1  christos {
    327  1.1  christos   return re_search_2_stub (bufp, string1, length1, string2, length2,
    328  1.1  christos 			   start, range, regs, stop, false);
    329  1.1  christos }
    330  1.1  christos #ifdef _LIBC
    331  1.1  christos weak_alias (__re_search_2, re_search_2)
    332  1.1  christos #endif
    333  1.1  christos 
    334  1.1  christos static regoff_t
    335  1.1  christos internal_function
    336  1.1  christos re_search_2_stub (struct re_pattern_buffer *bufp,
    337  1.1  christos 		  const char *string1, Idx length1,
    338  1.1  christos 		  const char *string2, Idx length2,
    339  1.1  christos 		  Idx start, regoff_t range, struct re_registers *regs,
    340  1.1  christos 		  Idx stop, bool ret_len)
    341  1.1  christos {
    342  1.1  christos   const char *str;
    343  1.1  christos   regoff_t rval;
    344  1.1  christos   Idx len = length1 + length2;
    345  1.1  christos   char *s = NULL;
    346  1.1  christos 
    347  1.1  christos   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
    348  1.1  christos     return -2;
    349  1.1  christos 
    350  1.1  christos   /* Concatenate the strings.  */
    351  1.1  christos   if (length2 > 0)
    352  1.1  christos     if (length1 > 0)
    353  1.1  christos       {
    354  1.1  christos 	s = re_malloc (char, len);
    355  1.1  christos 
    356  1.1  christos 	if (BE (s == NULL, 0))
    357  1.1  christos 	  return -2;
    358  1.1  christos 	memcpy (s, string1, length1);
    359  1.1  christos 	memcpy (s + length1, string2, length2);
    360  1.1  christos 	str = s;
    361  1.1  christos       }
    362  1.1  christos     else
    363  1.1  christos       str = string2;
    364  1.1  christos   else
    365  1.1  christos     str = string1;
    366  1.1  christos 
    367  1.1  christos   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
    368  1.1  christos 			 ret_len);
    369  1.1  christos   re_free (s);
    370  1.1  christos   return rval;
    371  1.1  christos }
    372  1.1  christos 
    373  1.1  christos /* The parameters have the same meaning as those of re_search.
    374  1.1  christos    Additional parameters:
    375  1.1  christos    If RET_LEN is true the length of the match is returned (re_match style);
    376  1.1  christos    otherwise the position of the match is returned.  */
    377  1.1  christos 
    378  1.1  christos static regoff_t
    379  1.1  christos internal_function
    380  1.1  christos re_search_stub (struct re_pattern_buffer *bufp,
    381  1.1  christos 		const char *string, Idx length,
    382  1.1  christos 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
    383  1.1  christos 		bool ret_len)
    384  1.1  christos {
    385  1.1  christos   reg_errcode_t result;
    386  1.1  christos   regmatch_t *pmatch;
    387  1.1  christos   Idx nregs;
    388  1.1  christos   regoff_t rval;
    389  1.1  christos   int eflags = 0;
    390  1.1  christos #ifdef _LIBC
    391  1.1  christos   re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
    392  1.1  christos #endif
    393  1.1  christos   Idx last_start = start + range;
    394  1.1  christos 
    395  1.1  christos   /* Check for out-of-range.  */
    396  1.1  christos   if (BE (start < 0 || start > length, 0))
    397  1.1  christos     return -1;
    398  1.1  christos   if (sizeof start < sizeof range)
    399  1.1  christos     {
    400  1.1  christos       regoff_t length_offset = length;
    401  1.1  christos       regoff_t start_offset = start;
    402  1.1  christos       if (BE (length_offset - start_offset < range, 0))
    403  1.1  christos 	last_start = length;
    404  1.1  christos       else if (BE (range < - start_offset, 0))
    405  1.1  christos 	last_start = 0;
    406  1.1  christos     }
    407  1.1  christos   else
    408  1.1  christos     {
    409  1.1  christos       if (BE ((last_start < start) != (range < 0), 0))
    410  1.1  christos 	{
    411  1.1  christos 	  /* Overflow occurred when computing last_start; substitute
    412  1.1  christos 	     the extreme value.  */
    413  1.1  christos 	  last_start = range < 0 ? 0 : length;
    414  1.1  christos 	}
    415  1.1  christos       else
    416  1.1  christos 	{
    417  1.1  christos 	  if (BE (length < last_start, 0))
    418  1.1  christos 	    last_start = length;
    419  1.1  christos 	  else if (BE (last_start < 0, 0))
    420  1.1  christos 	    last_start = 0;
    421  1.1  christos 	}
    422  1.1  christos     }
    423  1.1  christos 
    424  1.1  christos   __libc_lock_lock (dfa->lock);
    425  1.1  christos 
    426  1.1  christos   eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
    427  1.1  christos   eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
    428  1.1  christos 
    429  1.1  christos   /* Compile fastmap if we haven't yet.  */
    430  1.1  christos   if (start < last_start && bufp->re_fastmap != NULL
    431  1.1  christos       && !bufp->re_fastmap_accurate)
    432  1.1  christos     re_compile_fastmap (bufp);
    433  1.1  christos 
    434  1.1  christos   if (BE (bufp->re_no_sub, 0))
    435  1.1  christos     regs = NULL;
    436  1.1  christos 
    437  1.1  christos   /* We need at least 1 register.  */
    438  1.1  christos   if (regs == NULL)
    439  1.1  christos     nregs = 1;
    440  1.1  christos   else if (BE (bufp->re_regs_allocated == REG_FIXED
    441  1.1  christos 	       && regs->rm_num_regs <= bufp->re_nsub, 0))
    442  1.1  christos     {
    443  1.1  christos       nregs = regs->rm_num_regs;
    444  1.1  christos       if (BE (nregs < 1, 0))
    445  1.1  christos 	{
    446  1.1  christos 	  /* Nothing can be copied to regs.  */
    447  1.1  christos 	  regs = NULL;
    448  1.1  christos 	  nregs = 1;
    449  1.1  christos 	}
    450  1.1  christos     }
    451  1.1  christos   else
    452  1.1  christos     nregs = bufp->re_nsub + 1;
    453  1.1  christos   pmatch = re_xmalloc (regmatch_t, nregs);
    454  1.1  christos   if (BE (pmatch == NULL, 0))
    455  1.1  christos     {
    456  1.1  christos       rval = -2;
    457  1.1  christos       goto out;
    458  1.1  christos     }
    459  1.1  christos 
    460  1.1  christos   result = re_search_internal (bufp, string, length, start, last_start, stop,
    461  1.1  christos 			       nregs, pmatch, eflags);
    462  1.1  christos 
    463  1.1  christos   rval = 0;
    464  1.1  christos 
    465  1.1  christos   /* I hope we needn't fill ther regs with -1's when no match was found.  */
    466  1.1  christos   if (result != REG_NOERROR)
    467  1.1  christos     rval = -1;
    468  1.1  christos   else if (regs != NULL)
    469  1.1  christos     {
    470  1.1  christos       /* If caller wants register contents data back, copy them.  */
    471  1.1  christos       bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
    472  1.1  christos 					      bufp->re_regs_allocated);
    473  1.1  christos       if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
    474  1.1  christos 	rval = -2;
    475  1.1  christos     }
    476  1.1  christos 
    477  1.1  christos   if (BE (rval == 0, 1))
    478  1.1  christos     {
    479  1.1  christos       if (ret_len)
    480  1.1  christos 	{
    481  1.1  christos 	  assert (pmatch[0].rm_so == start);
    482  1.1  christos 	  rval = pmatch[0].rm_eo - start;
    483  1.1  christos 	}
    484  1.1  christos       else
    485  1.1  christos 	rval = pmatch[0].rm_so;
    486  1.1  christos     }
    487  1.1  christos   re_free (pmatch);
    488  1.1  christos  out:
    489  1.1  christos   __libc_lock_unlock (dfa->lock);
    490  1.1  christos   return rval;
    491  1.1  christos }
    492  1.1  christos 
    493  1.1  christos static unsigned
    494  1.1  christos internal_function
    495  1.1  christos re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
    496  1.1  christos 	      int regs_allocated)
    497  1.1  christos {
    498  1.1  christos   int rval = REG_REALLOCATE;
    499  1.1  christos   Idx i;
    500  1.1  christos   Idx need_regs = nregs + 1;
    501  1.1  christos   /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
    502  1.1  christos      uses.  */
    503  1.1  christos 
    504  1.1  christos   /* Have the register data arrays been allocated?  */
    505  1.1  christos   if (regs_allocated == REG_UNALLOCATED)
    506  1.1  christos     { /* No.  So allocate them with malloc.  */
    507  1.1  christos       regs->rm_start = re_xmalloc (regoff_t, need_regs);
    508  1.1  christos       regs->rm_end = re_malloc (regoff_t, need_regs);
    509  1.1  christos       if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
    510  1.1  christos 	return REG_UNALLOCATED;
    511  1.1  christos       regs->rm_num_regs = need_regs;
    512  1.1  christos     }
    513  1.1  christos   else if (regs_allocated == REG_REALLOCATE)
    514  1.1  christos     { /* Yes.  If we need more elements than were already
    515  1.1  christos 	 allocated, reallocate them.  If we need fewer, just
    516  1.1  christos 	 leave it alone.  */
    517  1.1  christos       if (BE (need_regs > regs->rm_num_regs, 0))
    518  1.1  christos 	{
    519  1.1  christos 	  regoff_t *new_start =
    520  1.1  christos 	    re_xrealloc (regs->rm_start, regoff_t, need_regs);
    521  1.1  christos 	  regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
    522  1.1  christos 	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
    523  1.1  christos 	    return REG_UNALLOCATED;
    524  1.1  christos 	  regs->rm_start = new_start;
    525  1.1  christos 	  regs->rm_end = new_end;
    526  1.1  christos 	  regs->rm_num_regs = need_regs;
    527  1.1  christos 	}
    528  1.1  christos     }
    529  1.1  christos   else
    530  1.1  christos     {
    531  1.1  christos       assert (regs_allocated == REG_FIXED);
    532  1.1  christos       /* This function may not be called with REG_FIXED and nregs too big.  */
    533  1.1  christos       assert (regs->rm_num_regs >= nregs);
    534  1.1  christos       rval = REG_FIXED;
    535  1.1  christos     }
    536  1.1  christos 
    537  1.1  christos   /* Copy the regs.  */
    538  1.1  christos   for (i = 0; i < nregs; ++i)
    539  1.1  christos     {
    540  1.1  christos       regs->rm_start[i] = pmatch[i].rm_so;
    541  1.1  christos       regs->rm_end[i] = pmatch[i].rm_eo;
    542  1.1  christos     }
    543  1.1  christos   for ( ; i < regs->rm_num_regs; ++i)
    544  1.1  christos     regs->rm_start[i] = regs->rm_end[i] = -1;
    545  1.1  christos 
    546  1.1  christos   return rval;
    547  1.1  christos }
    548  1.1  christos 
    549  1.1  christos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
    550  1.1  christos    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
    551  1.1  christos    this memory for recording register information.  STARTS and ENDS
    552  1.1  christos    must be allocated using the malloc library routine, and must each
    553  1.1  christos    be at least NUM_REGS * sizeof (regoff_t) bytes long.
    554  1.1  christos 
    555  1.1  christos    If NUM_REGS == 0, then subsequent matches should allocate their own
    556  1.1  christos    register data.
    557  1.1  christos 
    558  1.1  christos    Unless this function is called, the first search or match using
    559  1.1  christos    PATTERN_BUFFER will allocate its own register data, without
    560  1.1  christos    freeing the old data.  */
    561  1.1  christos 
    562  1.1  christos void
    563  1.1  christos re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
    564  1.1  christos 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
    565  1.1  christos {
    566  1.1  christos   if (num_regs)
    567  1.1  christos     {
    568  1.1  christos       bufp->re_regs_allocated = REG_REALLOCATE;
    569  1.1  christos       regs->rm_num_regs = num_regs;
    570  1.1  christos       regs->rm_start = starts;
    571  1.1  christos       regs->rm_end = ends;
    572  1.1  christos     }
    573  1.1  christos   else
    574  1.1  christos     {
    575  1.1  christos       bufp->re_regs_allocated = REG_UNALLOCATED;
    576  1.1  christos       regs->rm_num_regs = 0;
    577  1.1  christos       regs->rm_start = regs->rm_end = NULL;
    578  1.1  christos     }
    579  1.1  christos }
    580  1.1  christos #ifdef _LIBC
    581  1.1  christos weak_alias (__re_set_registers, re_set_registers)
    582  1.1  christos #endif
    583  1.1  christos 
    584  1.1  christos /* Entry points compatible with 4.2 BSD regex library.  We don't define
    586  1.1  christos    them unless specifically requested.  */
    587  1.1  christos 
    588  1.1  christos #if defined _REGEX_RE_COMP || defined _LIBC
    589  1.1  christos int
    590  1.1  christos # ifdef _LIBC
    591  1.1  christos weak_function
    592  1.1  christos # endif
    593  1.1  christos re_exec (const char *s)
    594  1.1  christos {
    595  1.1  christos   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
    596  1.1  christos }
    597  1.1  christos #endif /* _REGEX_RE_COMP */
    598  1.1  christos 
    599  1.1  christos /* Internal entry point.  */
    601  1.1  christos 
    602  1.1  christos /* Searches for a compiled pattern PREG in the string STRING, whose
    603  1.1  christos    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
    604  1.1  christos    meaning as with regexec.  LAST_START is START + RANGE, where
    605  1.1  christos    START and RANGE have the same meaning as with re_search.
    606  1.1  christos    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
    607  1.1  christos    otherwise return the error code.
    608  1.1  christos    Note: We assume front end functions already check ranges.
    609  1.1  christos    (0 <= LAST_START && LAST_START <= LENGTH)  */
    610  1.1  christos 
    611  1.1  christos static reg_errcode_t
    612  1.1  christos internal_function
    613  1.1  christos re_search_internal (const regex_t *preg,
    614  1.1  christos 		    const char *string, Idx length,
    615  1.1  christos 		    Idx start, Idx last_start, Idx stop,
    616  1.1  christos 		    size_t nmatch, regmatch_t pmatch[],
    617  1.1  christos 		    int eflags)
    618  1.1  christos {
    619  1.1  christos   reg_errcode_t err;
    620  1.1  christos   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
    621  1.1  christos   Idx left_lim, right_lim;
    622  1.1  christos   int incr;
    623  1.1  christos   bool fl_longest_match;
    624  1.1  christos   int match_kind;
    625  1.1  christos   Idx match_first, match_last = REG_MISSING;
    626  1.1  christos   Idx extra_nmatch;
    627  1.1  christos   bool sb;
    628  1.1  christos   int ch;
    629  1.1  christos #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
    630  1.1  christos   re_match_context_t mctx = { .dfa = dfa };
    631  1.1  christos #else
    632  1.1  christos   re_match_context_t mctx;
    633  1.1  christos #endif
    634  1.1  christos   char *fastmap = ((preg->re_fastmap != NULL && preg->re_fastmap_accurate
    635  1.1  christos 		    && start != last_start && !preg->re_can_be_null)
    636  1.1  christos 		   ? preg->re_fastmap : NULL);
    637  1.1  christos   unsigned REG_TRANSLATE_TYPE t =
    638  1.1  christos     (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
    639  1.1  christos 
    640  1.1  christos #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
    641  1.1  christos   memset (&mctx, '\0', sizeof (re_match_context_t));
    642  1.1  christos   mctx.dfa = dfa;
    643  1.1  christos #endif
    644  1.1  christos 
    645  1.1  christos   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
    646  1.1  christos   nmatch -= extra_nmatch;
    647  1.1  christos 
    648  1.1  christos   /* Check if the DFA haven't been compiled.  */
    649  1.1  christos   if (BE (preg->re_used == 0 || dfa->init_state == NULL
    650  1.1  christos 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
    651  1.1  christos 	  || dfa->init_state_begbuf == NULL, 0))
    652  1.1  christos     return REG_NOMATCH;
    653  1.1  christos 
    654  1.1  christos #ifdef DEBUG
    655  1.1  christos   /* We assume front-end functions already check them.  */
    656  1.1  christos   assert (0 <= last_start && last_start <= length);
    657  1.1  christos #endif
    658  1.1  christos 
    659  1.1  christos   /* If initial states with non-begbuf contexts have no elements,
    660  1.1  christos      the regex must be anchored.  If preg->re_newline_anchor is set,
    661  1.1  christos      we'll never use init_state_nl, so do not check it.  */
    662  1.1  christos   if (dfa->init_state->nodes.nelem == 0
    663  1.1  christos       && dfa->init_state_word->nodes.nelem == 0
    664  1.1  christos       && (dfa->init_state_nl->nodes.nelem == 0
    665  1.1  christos 	  || !preg->re_newline_anchor))
    666  1.1  christos     {
    667  1.1  christos       if (start != 0 && last_start != 0)
    668  1.1  christos         return REG_NOMATCH;
    669  1.1  christos       start = last_start = 0;
    670  1.1  christos     }
    671  1.1  christos 
    672  1.1  christos   /* We must check the longest matching, if nmatch > 0.  */
    673  1.1  christos   fl_longest_match = (nmatch != 0 || dfa->nbackref);
    674  1.1  christos 
    675  1.1  christos   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
    676  1.1  christos 			    preg->re_translate,
    677  1.1  christos 			    preg->re_syntax & REG_IGNORE_CASE, dfa);
    678  1.1  christos   if (BE (err != REG_NOERROR, 0))
    679  1.1  christos     goto free_return;
    680  1.1  christos   mctx.input.stop = stop;
    681  1.1  christos   mctx.input.raw_stop = stop;
    682  1.1  christos   mctx.input.newline_anchor = preg->re_newline_anchor;
    683  1.1  christos 
    684  1.1  christos   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
    685  1.1  christos   if (BE (err != REG_NOERROR, 0))
    686  1.1  christos     goto free_return;
    687  1.1  christos 
    688  1.1  christos   /* We will log all the DFA states through which the dfa pass,
    689  1.1  christos      if nmatch > 1, or this dfa has "multibyte node", which is a
    690  1.1  christos      back-reference or a node which can accept multibyte character or
    691  1.1  christos      multi character collating element.  */
    692  1.1  christos   if (nmatch > 1 || dfa->has_mb_node)
    693  1.1  christos     {
    694  1.1  christos       mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1);
    695  1.1  christos       if (BE (mctx.state_log == NULL, 0))
    696  1.1  christos 	{
    697  1.1  christos 	  err = REG_ESPACE;
    698  1.1  christos 	  goto free_return;
    699  1.1  christos 	}
    700  1.1  christos     }
    701  1.1  christos   else
    702  1.1  christos     mctx.state_log = NULL;
    703  1.1  christos 
    704  1.1  christos   match_first = start;
    705  1.1  christos   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
    706  1.1  christos 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
    707  1.1  christos 
    708  1.1  christos   /* Check incrementally whether of not the input string match.  */
    709  1.1  christos   incr = (last_start < start) ? -1 : 1;
    710  1.1  christos   left_lim = (last_start < start) ? last_start : start;
    711  1.1  christos   right_lim = (last_start < start) ? start : last_start;
    712  1.1  christos   sb = dfa->mb_cur_max == 1;
    713  1.1  christos   match_kind =
    714  1.1  christos     (fastmap
    715  1.1  christos      ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
    716  1.1  christos 	| (start <= last_start ? 2 : 0)
    717  1.1  christos 	| (t != NULL ? 1 : 0))
    718  1.1  christos      : 8);
    719  1.1  christos 
    720  1.1  christos   for (;; match_first += incr)
    721  1.1  christos     {
    722  1.1  christos       err = REG_NOMATCH;
    723  1.1  christos       if (match_first < left_lim || right_lim < match_first)
    724  1.1  christos 	goto free_return;
    725  1.1  christos 
    726  1.1  christos       /* Advance as rapidly as possible through the string, until we
    727  1.1  christos 	 find a plausible place to start matching.  This may be done
    728  1.1  christos 	 with varying efficiency, so there are various possibilities:
    729  1.1  christos 	 only the most common of them are specialized, in order to
    730  1.1  christos 	 save on code size.  We use a switch statement for speed.  */
    731  1.1  christos       switch (match_kind)
    732  1.1  christos 	{
    733  1.1  christos 	case 8:
    734  1.1  christos 	  /* No fastmap.  */
    735  1.1  christos 	  break;
    736  1.1  christos 
    737  1.1  christos 	case 7:
    738  1.1  christos 	  /* Fastmap with single-byte translation, match forward.  */
    739  1.1  christos 	  while (BE (match_first < right_lim, 1)
    740  1.1  christos 		 && !fastmap[t[(unsigned char) string[match_first]]])
    741  1.1  christos 	    ++match_first;
    742  1.1  christos 	  goto forward_match_found_start_or_reached_end;
    743  1.1  christos 
    744  1.1  christos 	case 6:
    745  1.1  christos 	  /* Fastmap without translation, match forward.  */
    746  1.1  christos 	  while (BE (match_first < right_lim, 1)
    747  1.1  christos 		 && !fastmap[(unsigned char) string[match_first]])
    748  1.1  christos 	    ++match_first;
    749  1.1  christos 
    750  1.1  christos 	forward_match_found_start_or_reached_end:
    751  1.1  christos 	  if (BE (match_first == right_lim, 0))
    752  1.1  christos 	    {
    753  1.1  christos 	      ch = match_first >= length
    754  1.1  christos 		       ? 0 : (unsigned char) string[match_first];
    755  1.1  christos 	      if (!fastmap[t ? t[ch] : ch])
    756  1.1  christos 		goto free_return;
    757  1.1  christos 	    }
    758  1.1  christos 	  break;
    759  1.1  christos 
    760  1.1  christos 	case 4:
    761  1.1  christos 	case 5:
    762  1.1  christos 	  /* Fastmap without multi-byte translation, match backwards.  */
    763  1.1  christos 	  while (match_first >= left_lim)
    764  1.1  christos 	    {
    765  1.1  christos 	      ch = match_first >= length
    766  1.1  christos 		       ? 0 : (unsigned char) string[match_first];
    767  1.1  christos 	      if (fastmap[t ? t[ch] : ch])
    768  1.1  christos 		break;
    769  1.1  christos 	      --match_first;
    770  1.1  christos 	    }
    771  1.1  christos 	  if (match_first < left_lim)
    772  1.1  christos 	    goto free_return;
    773  1.1  christos 	  break;
    774  1.1  christos 
    775  1.1  christos 	default:
    776  1.1  christos 	  /* In this case, we can't determine easily the current byte,
    777  1.1  christos 	     since it might be a component byte of a multibyte
    778  1.1  christos 	     character.  Then we use the constructed buffer instead.  */
    779  1.1  christos 	  for (;;)
    780  1.1  christos 	    {
    781  1.1  christos 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
    782  1.1  christos 		 buffers.  */
    783  1.1  christos 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
    784  1.1  christos 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
    785  1.1  christos 		{
    786  1.1  christos 		  err = re_string_reconstruct (&mctx.input, match_first,
    787  1.1  christos 					       eflags);
    788  1.1  christos 		  if (BE (err != REG_NOERROR, 0))
    789  1.1  christos 		    goto free_return;
    790  1.1  christos 
    791  1.1  christos 		  offset = match_first - mctx.input.raw_mbs_idx;
    792  1.1  christos 		}
    793  1.1  christos 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
    794  1.1  christos 		 Note that MATCH_FIRST must not be smaller than 0.  */
    795  1.1  christos 	      ch = (match_first >= length
    796  1.1  christos 		    ? 0 : re_string_byte_at (&mctx.input, offset));
    797  1.1  christos 	      if (fastmap[ch])
    798  1.1  christos 		break;
    799  1.1  christos 	      match_first += incr;
    800  1.1  christos 	      if (match_first < left_lim || match_first > right_lim)
    801  1.1  christos 	        {
    802  1.1  christos 	          err = REG_NOMATCH;
    803  1.1  christos 	          goto free_return;
    804  1.1  christos 	        }
    805  1.1  christos 	    }
    806  1.1  christos 	  break;
    807  1.1  christos 	}
    808  1.1  christos 
    809  1.1  christos       /* Reconstruct the buffers so that the matcher can assume that
    810  1.1  christos 	 the matching starts from the beginning of the buffer.  */
    811  1.1  christos       err = re_string_reconstruct (&mctx.input, match_first, eflags);
    812  1.1  christos       if (BE (err != REG_NOERROR, 0))
    813  1.1  christos 	goto free_return;
    814  1.1  christos 
    815  1.1  christos #ifdef RE_ENABLE_I18N
    816  1.1  christos      /* Don't consider this char as a possible match start if it part,
    817  1.1  christos 	yet isn't the head, of a multibyte character.  */
    818  1.1  christos       if (!sb && !re_string_first_byte (&mctx.input, 0))
    819  1.1  christos 	continue;
    820  1.1  christos #endif
    821  1.1  christos 
    822  1.1  christos       /* It seems to be appropriate one, then use the matcher.  */
    823  1.1  christos       /* We assume that the matching starts from 0.  */
    824  1.1  christos       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
    825  1.1  christos       match_last = check_matching (&mctx, fl_longest_match,
    826  1.1  christos 				   start <= last_start ? &match_first : NULL);
    827  1.1  christos       if (match_last != REG_MISSING)
    828  1.1  christos 	{
    829  1.1  christos 	  if (BE (match_last == REG_ERROR, 0))
    830  1.1  christos 	    {
    831  1.1  christos 	      err = REG_ESPACE;
    832  1.1  christos 	      goto free_return;
    833  1.1  christos 	    }
    834  1.1  christos 	  else
    835  1.1  christos 	    {
    836  1.1  christos 	      mctx.match_last = match_last;
    837  1.1  christos 	      if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
    838  1.1  christos 		{
    839  1.1  christos 		  re_dfastate_t *pstate = mctx.state_log[match_last];
    840  1.1  christos 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
    841  1.1  christos 							     match_last);
    842  1.1  christos 		}
    843  1.1  christos 	      if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
    844  1.1  christos 		  || dfa->nbackref)
    845  1.1  christos 		{
    846  1.1  christos 		  err = prune_impossible_nodes (&mctx);
    847  1.1  christos 		  if (err == REG_NOERROR)
    848  1.1  christos 		    break;
    849  1.1  christos 		  if (BE (err != REG_NOMATCH, 0))
    850  1.1  christos 		    goto free_return;
    851  1.1  christos 		  match_last = REG_MISSING;
    852  1.1  christos 		}
    853  1.1  christos 	      else
    854  1.1  christos 		break; /* We found a match.  */
    855  1.1  christos 	    }
    856  1.1  christos 	}
    857  1.1  christos 
    858  1.1  christos       match_ctx_clean (&mctx);
    859  1.1  christos     }
    860  1.1  christos 
    861  1.1  christos #ifdef DEBUG
    862  1.1  christos   assert (match_last != REG_MISSING);
    863  1.1  christos   assert (err == REG_NOERROR);
    864  1.1  christos #endif
    865  1.1  christos 
    866  1.1  christos   /* Set pmatch[] if we need.  */
    867  1.1  christos   if (nmatch > 0)
    868  1.1  christos     {
    869  1.1  christos       Idx reg_idx;
    870  1.1  christos 
    871  1.1  christos       /* Initialize registers.  */
    872  1.1  christos       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
    873  1.1  christos 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
    874  1.1  christos 
    875  1.1  christos       /* Set the points where matching start/end.  */
    876  1.1  christos       pmatch[0].rm_so = 0;
    877  1.1  christos       pmatch[0].rm_eo = mctx.match_last;
    878  1.1  christos       /* FIXME: This function should fail if mctx.match_last exceeds
    879  1.1  christos 	 the maximum possible regoff_t value.  We need a new error
    880  1.1  christos 	 code REG_OVERFLOW.  */
    881  1.1  christos 
    882  1.1  christos       if (!preg->re_no_sub && nmatch > 1)
    883  1.1  christos 	{
    884  1.1  christos 	  err = set_regs (preg, &mctx, nmatch, pmatch,
    885  1.1  christos 			  dfa->has_plural_match && dfa->nbackref > 0);
    886  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
    887  1.1  christos 	    goto free_return;
    888  1.1  christos 	}
    889  1.1  christos 
    890  1.1  christos       /* At last, add the offset to the each registers, since we slided
    891  1.1  christos 	 the buffers so that we could assume that the matching starts
    892  1.1  christos 	 from 0.  */
    893  1.1  christos       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
    894  1.1  christos 	if (pmatch[reg_idx].rm_so != -1)
    895  1.1  christos 	  {
    896  1.1  christos #ifdef RE_ENABLE_I18N
    897  1.1  christos 	    if (BE (mctx.input.offsets_needed != 0, 0))
    898  1.1  christos 	      {
    899  1.1  christos 		pmatch[reg_idx].rm_so =
    900  1.1  christos 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
    901  1.1  christos 		   ? mctx.input.valid_raw_len
    902  1.1  christos 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
    903  1.1  christos 		pmatch[reg_idx].rm_eo =
    904  1.1  christos 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
    905  1.1  christos 		   ? mctx.input.valid_raw_len
    906  1.1  christos 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
    907  1.1  christos 	      }
    908  1.1  christos #else
    909  1.1  christos 	    assert (mctx.input.offsets_needed == 0);
    910  1.1  christos #endif
    911  1.1  christos 	    pmatch[reg_idx].rm_so += match_first;
    912  1.1  christos 	    pmatch[reg_idx].rm_eo += match_first;
    913  1.1  christos 	  }
    914  1.1  christos       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
    915  1.1  christos 	{
    916  1.1  christos 	  pmatch[nmatch + reg_idx].rm_so = -1;
    917  1.1  christos 	  pmatch[nmatch + reg_idx].rm_eo = -1;
    918  1.1  christos 	}
    919  1.1  christos 
    920  1.1  christos       if (dfa->subexp_map)
    921  1.1  christos         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
    922  1.1  christos           if (dfa->subexp_map[reg_idx] != reg_idx)
    923  1.1  christos             {
    924  1.1  christos               pmatch[reg_idx + 1].rm_so
    925  1.1  christos                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
    926  1.1  christos               pmatch[reg_idx + 1].rm_eo
    927  1.1  christos                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
    928  1.1  christos             }
    929  1.1  christos     }
    930  1.1  christos 
    931  1.1  christos  free_return:
    932  1.1  christos   re_free (mctx.state_log);
    933  1.1  christos   if (dfa->nbackref)
    934  1.1  christos     match_ctx_free (&mctx);
    935  1.1  christos   re_string_destruct (&mctx.input);
    936  1.1  christos   return err;
    937  1.1  christos }
    938  1.1  christos 
    939  1.1  christos static reg_errcode_t
    940  1.1  christos internal_function
    941  1.1  christos prune_impossible_nodes (re_match_context_t *mctx)
    942  1.1  christos {
    943  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
    944  1.1  christos   Idx halt_node, match_last;
    945  1.1  christos   reg_errcode_t ret;
    946  1.1  christos   re_dfastate_t **sifted_states;
    947  1.1  christos   re_dfastate_t **lim_states = NULL;
    948  1.1  christos   re_sift_context_t sctx;
    949  1.1  christos #ifdef DEBUG
    950  1.1  christos   assert (mctx->state_log != NULL);
    951  1.1  christos #endif
    952  1.1  christos   match_last = mctx->match_last;
    953  1.1  christos   halt_node = mctx->last_node;
    954  1.1  christos   sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1);
    955  1.1  christos   if (BE (sifted_states == NULL, 0))
    956  1.1  christos     {
    957  1.1  christos       ret = REG_ESPACE;
    958  1.1  christos       goto free_return;
    959  1.1  christos     }
    960  1.1  christos   if (dfa->nbackref)
    961  1.1  christos     {
    962  1.1  christos       lim_states = re_xmalloc (re_dfastate_t *, match_last + 1);
    963  1.1  christos       if (BE (lim_states == NULL, 0))
    964  1.1  christos 	{
    965  1.1  christos 	  ret = REG_ESPACE;
    966  1.1  christos 	  goto free_return;
    967  1.1  christos 	}
    968  1.1  christos       while (1)
    969  1.1  christos 	{
    970  1.1  christos 	  memset (lim_states, '\0',
    971  1.1  christos 		  sizeof (re_dfastate_t *) * (match_last + 1));
    972  1.1  christos 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
    973  1.1  christos 			 match_last);
    974  1.1  christos 	  ret = sift_states_backward (mctx, &sctx);
    975  1.1  christos 	  re_node_set_free (&sctx.limits);
    976  1.1  christos 	  if (BE (ret != REG_NOERROR, 0))
    977  1.1  christos 	      goto free_return;
    978  1.1  christos 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
    979  1.1  christos 	    break;
    980  1.1  christos 	  do
    981  1.1  christos 	    {
    982  1.1  christos 	      --match_last;
    983  1.1  christos 	      if (! REG_VALID_INDEX (match_last))
    984  1.1  christos 		{
    985  1.1  christos 		  ret = REG_NOMATCH;
    986  1.1  christos 		  goto free_return;
    987  1.1  christos 		}
    988  1.1  christos 	    } while (mctx->state_log[match_last] == NULL
    989  1.1  christos 		     || !mctx->state_log[match_last]->halt);
    990  1.1  christos 	  halt_node = check_halt_state_context (mctx,
    991  1.1  christos 						mctx->state_log[match_last],
    992  1.1  christos 						match_last);
    993  1.1  christos 	}
    994  1.1  christos       ret = merge_state_array (dfa, sifted_states, lim_states,
    995  1.1  christos 			       match_last + 1);
    996  1.1  christos       re_free (lim_states);
    997  1.1  christos       lim_states = NULL;
    998  1.1  christos       if (BE (ret != REG_NOERROR, 0))
    999  1.1  christos 	goto free_return;
   1000  1.1  christos     }
   1001  1.1  christos   else
   1002  1.1  christos     {
   1003  1.1  christos       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
   1004  1.1  christos       ret = sift_states_backward (mctx, &sctx);
   1005  1.1  christos       re_node_set_free (&sctx.limits);
   1006  1.1  christos       if (BE (ret != REG_NOERROR, 0))
   1007  1.1  christos 	goto free_return;
   1008  1.1  christos     }
   1009  1.1  christos   re_free (mctx->state_log);
   1010  1.1  christos   mctx->state_log = sifted_states;
   1011  1.1  christos   sifted_states = NULL;
   1012  1.1  christos   mctx->last_node = halt_node;
   1013  1.1  christos   mctx->match_last = match_last;
   1014  1.1  christos   ret = REG_NOERROR;
   1015  1.1  christos  free_return:
   1016  1.1  christos   re_free (sifted_states);
   1017  1.1  christos   re_free (lim_states);
   1018  1.1  christos   return ret;
   1019  1.1  christos }
   1020  1.1  christos 
   1021  1.1  christos /* Acquire an initial state and return it.
   1022  1.1  christos    We must select appropriate initial state depending on the context,
   1023  1.1  christos    since initial states may have constraints like "\<", "^", etc..  */
   1024  1.1  christos 
   1025  1.1  christos static inline re_dfastate_t *
   1026  1.1  christos __attribute ((always_inline)) internal_function
   1027  1.1  christos acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
   1028  1.1  christos 			    Idx idx)
   1029  1.1  christos {
   1030  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1031  1.1  christos   if (dfa->init_state->has_constraint)
   1032  1.1  christos     {
   1033  1.1  christos       unsigned int context;
   1034  1.1  christos       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
   1035  1.1  christos       if (IS_WORD_CONTEXT (context))
   1036  1.1  christos 	return dfa->init_state_word;
   1037  1.1  christos       else if (IS_ORDINARY_CONTEXT (context))
   1038  1.1  christos 	return dfa->init_state;
   1039  1.1  christos       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
   1040  1.1  christos 	return dfa->init_state_begbuf;
   1041  1.1  christos       else if (IS_NEWLINE_CONTEXT (context))
   1042  1.1  christos 	return dfa->init_state_nl;
   1043  1.1  christos       else if (IS_BEGBUF_CONTEXT (context))
   1044  1.1  christos 	{
   1045  1.1  christos 	  /* It is relatively rare case, then calculate on demand.  */
   1046  1.1  christos 	  return re_acquire_state_context (err, dfa,
   1047  1.1  christos 					   dfa->init_state->entrance_nodes,
   1048  1.1  christos 					   context);
   1049  1.1  christos 	}
   1050  1.1  christos       else
   1051  1.1  christos 	/* Must not happen?  */
   1052  1.1  christos 	return dfa->init_state;
   1053  1.1  christos     }
   1054  1.1  christos   else
   1055  1.1  christos     return dfa->init_state;
   1056  1.1  christos }
   1057  1.1  christos 
   1058  1.1  christos /* Check whether the regular expression match input string INPUT or not,
   1059  1.1  christos    and return the index where the matching end.  Return REG_MISSING if
   1060  1.1  christos    there is no match, and return REG_ERROR in case of an error.
   1061  1.1  christos    FL_LONGEST_MATCH means we want the POSIX longest matching.
   1062  1.1  christos    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
   1063  1.1  christos    next place where we may want to try matching.
   1064  1.1  christos    Note that the matcher assume that the maching starts from the current
   1065  1.1  christos    index of the buffer.  */
   1066  1.1  christos 
   1067  1.1  christos static Idx
   1068  1.1  christos internal_function
   1069  1.1  christos check_matching (re_match_context_t *mctx, bool fl_longest_match,
   1070  1.1  christos 		Idx *p_match_first)
   1071  1.1  christos {
   1072  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1073  1.1  christos   reg_errcode_t err;
   1074  1.1  christos   Idx match = 0;
   1075  1.1  christos   Idx match_last = REG_MISSING;
   1076  1.1  christos   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   1077  1.1  christos   re_dfastate_t *cur_state;
   1078  1.1  christos   bool at_init_state = p_match_first != NULL;
   1079  1.1  christos   Idx next_start_idx = cur_str_idx;
   1080  1.1  christos 
   1081  1.1  christos   err = REG_NOERROR;
   1082  1.1  christos   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
   1083  1.1  christos   /* An initial state must not be NULL (invalid).  */
   1084  1.1  christos   if (BE (cur_state == NULL, 0))
   1085  1.1  christos     {
   1086  1.1  christos       assert (err == REG_ESPACE);
   1087  1.1  christos       return REG_ERROR;
   1088  1.1  christos     }
   1089  1.1  christos 
   1090  1.1  christos   if (mctx->state_log != NULL)
   1091  1.1  christos     {
   1092  1.1  christos       mctx->state_log[cur_str_idx] = cur_state;
   1093  1.1  christos 
   1094  1.1  christos       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
   1095  1.1  christos 	 later.  E.g. Processing back references.  */
   1096  1.1  christos       if (BE (dfa->nbackref, 0))
   1097  1.1  christos 	{
   1098  1.1  christos 	  at_init_state = false;
   1099  1.1  christos 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
   1100  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   1101  1.1  christos 	    return err;
   1102  1.1  christos 
   1103  1.1  christos 	  if (cur_state->has_backref)
   1104  1.1  christos 	    {
   1105  1.1  christos 	      err = transit_state_bkref (mctx, &cur_state->nodes);
   1106  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   1107  1.1  christos 	        return err;
   1108  1.1  christos 	    }
   1109  1.1  christos 	}
   1110  1.1  christos     }
   1111  1.1  christos 
   1112  1.1  christos   /* If the RE accepts NULL string.  */
   1113  1.1  christos   if (BE (cur_state->halt, 0))
   1114  1.1  christos     {
   1115  1.1  christos       if (!cur_state->has_constraint
   1116  1.1  christos 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
   1117  1.1  christos 	{
   1118  1.1  christos 	  if (!fl_longest_match)
   1119  1.1  christos 	    return cur_str_idx;
   1120  1.1  christos 	  else
   1121  1.1  christos 	    {
   1122  1.1  christos 	      match_last = cur_str_idx;
   1123  1.1  christos 	      match = 1;
   1124  1.1  christos 	    }
   1125  1.1  christos 	}
   1126  1.1  christos     }
   1127  1.1  christos 
   1128  1.1  christos   while (!re_string_eoi (&mctx->input))
   1129  1.1  christos     {
   1130  1.1  christos       re_dfastate_t *old_state = cur_state;
   1131  1.1  christos       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
   1132  1.1  christos 
   1133  1.1  christos       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
   1134  1.1  christos           || (BE (next_char_idx >= mctx->input.valid_len, 0)
   1135  1.1  christos               && mctx->input.valid_len < mctx->input.len))
   1136  1.1  christos         {
   1137  1.1  christos           err = extend_buffers (mctx);
   1138  1.1  christos           if (BE (err != REG_NOERROR, 0))
   1139  1.1  christos 	    {
   1140  1.1  christos 	      assert (err == REG_ESPACE);
   1141  1.1  christos 	      return REG_ERROR;
   1142  1.1  christos 	    }
   1143  1.1  christos         }
   1144  1.1  christos 
   1145  1.1  christos       cur_state = transit_state (&err, mctx, cur_state);
   1146  1.1  christos       if (mctx->state_log != NULL)
   1147  1.1  christos 	cur_state = merge_state_with_log (&err, mctx, cur_state);
   1148  1.1  christos 
   1149  1.1  christos       if (cur_state == NULL)
   1150  1.1  christos 	{
   1151  1.1  christos 	  /* Reached the invalid state or an error.  Try to recover a valid
   1152  1.1  christos 	     state using the state log, if available and if we have not
   1153  1.1  christos 	     already found a valid (even if not the longest) match.  */
   1154  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   1155  1.1  christos 	    return REG_ERROR;
   1156  1.1  christos 
   1157  1.1  christos 	  if (mctx->state_log == NULL
   1158  1.1  christos 	      || (match && !fl_longest_match)
   1159  1.1  christos 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
   1160  1.1  christos 	    break;
   1161  1.1  christos 	}
   1162  1.1  christos 
   1163  1.1  christos       if (BE (at_init_state, 0))
   1164  1.1  christos 	{
   1165  1.1  christos 	  if (old_state == cur_state)
   1166  1.1  christos 	    next_start_idx = next_char_idx;
   1167  1.1  christos 	  else
   1168  1.1  christos 	    at_init_state = false;
   1169  1.1  christos 	}
   1170  1.1  christos 
   1171  1.1  christos       if (cur_state->halt)
   1172  1.1  christos 	{
   1173  1.1  christos 	  /* Reached a halt state.
   1174  1.1  christos 	     Check the halt state can satisfy the current context.  */
   1175  1.1  christos 	  if (!cur_state->has_constraint
   1176  1.1  christos 	      || check_halt_state_context (mctx, cur_state,
   1177  1.1  christos 					   re_string_cur_idx (&mctx->input)))
   1178  1.1  christos 	    {
   1179  1.1  christos 	      /* We found an appropriate halt state.  */
   1180  1.1  christos 	      match_last = re_string_cur_idx (&mctx->input);
   1181  1.1  christos 	      match = 1;
   1182  1.1  christos 
   1183  1.1  christos 	      /* We found a match, do not modify match_first below.  */
   1184  1.1  christos 	      p_match_first = NULL;
   1185  1.1  christos 	      if (!fl_longest_match)
   1186  1.1  christos 		break;
   1187  1.1  christos 	    }
   1188  1.1  christos 	}
   1189  1.1  christos     }
   1190  1.1  christos 
   1191  1.1  christos   if (p_match_first)
   1192  1.1  christos     *p_match_first += next_start_idx;
   1193  1.1  christos 
   1194  1.1  christos   return match_last;
   1195  1.1  christos }
   1196  1.1  christos 
   1197  1.1  christos /* Check NODE match the current context.  */
   1198  1.1  christos 
   1199  1.1  christos static bool
   1200  1.1  christos internal_function
   1201  1.1  christos check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
   1202  1.1  christos {
   1203  1.1  christos   re_token_type_t type = dfa->nodes[node].type;
   1204  1.1  christos   unsigned int constraint = dfa->nodes[node].constraint;
   1205  1.1  christos   if (type != END_OF_RE)
   1206  1.1  christos     return false;
   1207  1.1  christos   if (!constraint)
   1208  1.1  christos     return true;
   1209  1.1  christos   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
   1210  1.1  christos     return false;
   1211  1.1  christos   return true;
   1212  1.1  christos }
   1213  1.1  christos 
   1214  1.1  christos /* Check the halt state STATE match the current context.
   1215  1.1  christos    Return 0 if not match, if the node, STATE has, is a halt node and
   1216  1.1  christos    match the context, return the node.  */
   1217  1.1  christos 
   1218  1.1  christos static Idx
   1219  1.1  christos internal_function
   1220  1.1  christos check_halt_state_context (const re_match_context_t *mctx,
   1221  1.1  christos 			  const re_dfastate_t *state, Idx idx)
   1222  1.1  christos {
   1223  1.1  christos   Idx i;
   1224  1.1  christos   unsigned int context;
   1225  1.1  christos #ifdef DEBUG
   1226  1.1  christos   assert (state->halt);
   1227  1.1  christos #endif
   1228  1.1  christos   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
   1229  1.1  christos   for (i = 0; i < state->nodes.nelem; ++i)
   1230  1.1  christos     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
   1231  1.1  christos       return state->nodes.elems[i];
   1232  1.1  christos   return 0;
   1233  1.1  christos }
   1234  1.1  christos 
   1235  1.1  christos /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
   1236  1.1  christos    corresponding to the DFA).
   1237  1.1  christos    Return the destination node, and update EPS_VIA_NODES;
   1238  1.1  christos    return REG_MISSING in case of errors.  */
   1239  1.1  christos 
   1240  1.1  christos static Idx
   1241  1.1  christos internal_function
   1242  1.1  christos proceed_next_node (const re_match_context_t *mctx,
   1243  1.1  christos 		   Idx nregs, regmatch_t *regs, Idx *pidx, Idx node,
   1244  1.1  christos 		   re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
   1245  1.1  christos {
   1246  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1247  1.1  christos   Idx i;
   1248  1.1  christos   bool ok;
   1249  1.1  christos   if (IS_EPSILON_NODE (dfa->nodes[node].type))
   1250  1.1  christos     {
   1251  1.1  christos       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
   1252  1.1  christos       re_node_set *edests = &dfa->edests[node];
   1253  1.1  christos       Idx dest_node;
   1254  1.1  christos       ok = re_node_set_insert (eps_via_nodes, node);
   1255  1.1  christos       if (BE (! ok, 0))
   1256  1.1  christos 	return REG_ERROR;
   1257  1.1  christos       /* Pick up a valid destination, or return REG_MISSING if none
   1258  1.1  christos 	 is found.  */
   1259  1.1  christos       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
   1260  1.1  christos 	{
   1261  1.1  christos 	  Idx candidate = edests->elems[i];
   1262  1.1  christos 	  if (!re_node_set_contains (cur_nodes, candidate))
   1263  1.1  christos 	    continue;
   1264  1.1  christos           if (dest_node == REG_MISSING)
   1265  1.1  christos 	    dest_node = candidate;
   1266  1.1  christos 
   1267  1.1  christos           else
   1268  1.1  christos 	    {
   1269  1.1  christos 	      /* In order to avoid infinite loop like "(a*)*", return the second
   1270  1.1  christos 	         epsilon-transition if the first was already considered.  */
   1271  1.1  christos 	      if (re_node_set_contains (eps_via_nodes, dest_node))
   1272  1.1  christos 	        return candidate;
   1273  1.1  christos 
   1274  1.1  christos 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
   1275  1.1  christos 	      else if (fs != NULL
   1276  1.1  christos 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
   1277  1.1  christos 				           eps_via_nodes))
   1278  1.1  christos 		return REG_ERROR;
   1279  1.1  christos 
   1280  1.1  christos 	      /* We know we are going to exit.  */
   1281  1.1  christos 	      break;
   1282  1.1  christos 	    }
   1283  1.1  christos 	}
   1284  1.1  christos       return dest_node;
   1285  1.1  christos     }
   1286  1.1  christos   else
   1287  1.1  christos     {
   1288  1.1  christos       Idx naccepted = 0;
   1289  1.1  christos       re_token_type_t type = dfa->nodes[node].type;
   1290  1.1  christos 
   1291  1.1  christos #ifdef RE_ENABLE_I18N
   1292  1.1  christos       if (dfa->nodes[node].accept_mb)
   1293  1.1  christos 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
   1294  1.1  christos       else
   1295  1.1  christos #endif /* RE_ENABLE_I18N */
   1296  1.1  christos       if (type == OP_BACK_REF)
   1297  1.1  christos 	{
   1298  1.1  christos 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
   1299  1.1  christos 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
   1300  1.1  christos 	  if (fs != NULL)
   1301  1.1  christos 	    {
   1302  1.1  christos 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
   1303  1.1  christos 		return REG_MISSING;
   1304  1.1  christos 	      else if (naccepted)
   1305  1.1  christos 		{
   1306  1.1  christos 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
   1307  1.1  christos 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
   1308  1.1  christos 			      naccepted) != 0)
   1309  1.1  christos 		    return REG_MISSING;
   1310  1.1  christos 		}
   1311  1.1  christos 	    }
   1312  1.1  christos 
   1313  1.1  christos 	  if (naccepted == 0)
   1314  1.1  christos 	    {
   1315  1.1  christos 	      Idx dest_node;
   1316  1.1  christos 	      ok = re_node_set_insert (eps_via_nodes, node);
   1317  1.1  christos 	      if (BE (! ok, 0))
   1318  1.1  christos 		return REG_ERROR;
   1319  1.1  christos 	      dest_node = dfa->edests[node].elems[0];
   1320  1.1  christos 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
   1321  1.1  christos 					dest_node))
   1322  1.1  christos 		return dest_node;
   1323  1.1  christos 	    }
   1324  1.1  christos 	}
   1325  1.1  christos 
   1326  1.1  christos       if (naccepted != 0
   1327  1.1  christos 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
   1328  1.1  christos 	{
   1329  1.1  christos 	  Idx dest_node = dfa->nexts[node];
   1330  1.1  christos 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
   1331  1.1  christos 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
   1332  1.1  christos 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
   1333  1.1  christos 					       dest_node)))
   1334  1.1  christos 	    return REG_MISSING;
   1335  1.1  christos 	  re_node_set_empty (eps_via_nodes);
   1336  1.1  christos 	  return dest_node;
   1337  1.1  christos 	}
   1338  1.1  christos     }
   1339  1.1  christos   return REG_MISSING;
   1340  1.1  christos }
   1341  1.1  christos 
   1342  1.1  christos static reg_errcode_t
   1343  1.1  christos internal_function
   1344  1.1  christos push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
   1345  1.1  christos 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
   1346  1.1  christos {
   1347  1.1  christos   reg_errcode_t err;
   1348  1.1  christos   Idx num = fs->num++;
   1349  1.1  christos   if (fs->num == fs->alloc)
   1350  1.1  christos     {
   1351  1.1  christos       struct re_fail_stack_ent_t *new_array =
   1352  1.1  christos 	re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc);
   1353  1.1  christos       if (new_array == NULL)
   1354  1.1  christos 	return REG_ESPACE;
   1355  1.1  christos       fs->stack = new_array;
   1356  1.1  christos     }
   1357  1.1  christos   fs->stack[num].idx = str_idx;
   1358  1.1  christos   fs->stack[num].node = dest_node;
   1359  1.1  christos   fs->stack[num].regs = re_xmalloc (regmatch_t, nregs);
   1360  1.1  christos   if (fs->stack[num].regs == NULL)
   1361  1.1  christos     return REG_ESPACE;
   1362  1.1  christos   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
   1363  1.1  christos   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
   1364  1.1  christos   return err;
   1365  1.1  christos }
   1366  1.1  christos 
   1367  1.1  christos static Idx
   1368  1.1  christos internal_function
   1369  1.1  christos pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx,
   1370  1.1  christos 		Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
   1371  1.1  christos {
   1372  1.1  christos   Idx num = --fs->num;
   1373  1.1  christos   assert (REG_VALID_INDEX (num));
   1374  1.1  christos   *pidx = fs->stack[num].idx;
   1375  1.1  christos   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
   1376  1.1  christos   re_node_set_free (eps_via_nodes);
   1377  1.1  christos   re_free (fs->stack[num].regs);
   1378  1.1  christos   *eps_via_nodes = fs->stack[num].eps_via_nodes;
   1379  1.1  christos   return fs->stack[num].node;
   1380  1.1  christos }
   1381  1.1  christos 
   1382  1.1  christos /* Set the positions where the subexpressions are starts/ends to registers
   1383  1.1  christos    PMATCH.
   1384  1.1  christos    Note: We assume that pmatch[0] is already set, and
   1385  1.1  christos    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
   1386  1.1  christos 
   1387  1.1  christos static reg_errcode_t
   1388  1.1  christos internal_function
   1389  1.1  christos set_regs (const regex_t *preg, const re_match_context_t *mctx,
   1390  1.1  christos 	  size_t nmatch, regmatch_t *pmatch, bool fl_backtrack)
   1391  1.1  christos {
   1392  1.1  christos   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
   1393  1.1  christos   Idx idx, cur_node;
   1394  1.1  christos   re_node_set eps_via_nodes;
   1395  1.1  christos   struct re_fail_stack_t *fs;
   1396  1.1  christos   struct re_fail_stack_t fs_body = { 0, 2, NULL };
   1397  1.1  christos   regmatch_t *prev_idx_match;
   1398  1.1  christos   bool prev_idx_match_malloced = false;
   1399  1.1  christos 
   1400  1.1  christos #ifdef DEBUG
   1401  1.1  christos   assert (nmatch > 1);
   1402  1.1  christos   assert (mctx->state_log != NULL);
   1403  1.1  christos #endif
   1404  1.1  christos   if (fl_backtrack)
   1405  1.1  christos     {
   1406  1.1  christos       fs = &fs_body;
   1407  1.1  christos       fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc);
   1408  1.1  christos       if (fs->stack == NULL)
   1409  1.1  christos 	return REG_ESPACE;
   1410  1.1  christos     }
   1411  1.1  christos   else
   1412  1.1  christos     fs = NULL;
   1413  1.1  christos 
   1414  1.1  christos   cur_node = dfa->init_node;
   1415  1.1  christos   re_node_set_init_empty (&eps_via_nodes);
   1416  1.1  christos 
   1417  1.1  christos   if (re_alloc_oversized (nmatch, sizeof (regmatch_t)))
   1418  1.1  christos     {
   1419  1.2  christos       free_fail_stack_return (fs);
   1420  1.1  christos       return REG_ESPACE;
   1421  1.1  christos     }
   1422  1.1  christos #ifndef __SSP__
   1423  1.2  christos   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
   1424  1.1  christos     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
   1425  1.1  christos   else
   1426  1.1  christos #endif
   1427  1.1  christos     {
   1428  1.1  christos       prev_idx_match = re_malloc (regmatch_t, nmatch);
   1429  1.1  christos       if (prev_idx_match == NULL)
   1430  1.1  christos 	{
   1431  1.1  christos 	  free_fail_stack_return (fs);
   1432  1.1  christos 	  return REG_ESPACE;
   1433  1.1  christos 	}
   1434  1.1  christos       prev_idx_match_malloced = true;
   1435  1.1  christos     }
   1436  1.1  christos   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
   1437  1.1  christos 
   1438  1.1  christos   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
   1439  1.1  christos     {
   1440  1.1  christos       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
   1441  1.1  christos 
   1442  1.1  christos       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
   1443  1.1  christos 	{
   1444  1.1  christos 	  Idx reg_idx;
   1445  1.1  christos 	  if (fs)
   1446  1.1  christos 	    {
   1447  1.1  christos 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
   1448  1.1  christos 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
   1449  1.1  christos 		  break;
   1450  1.1  christos 	      if (reg_idx == nmatch)
   1451  1.1  christos 		{
   1452  1.1  christos 		  re_node_set_free (&eps_via_nodes);
   1453  1.1  christos 		  if (prev_idx_match_malloced)
   1454  1.1  christos 		    re_free (prev_idx_match);
   1455  1.1  christos 		  return free_fail_stack_return (fs);
   1456  1.1  christos 		}
   1457  1.1  christos 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
   1458  1.1  christos 					 &eps_via_nodes);
   1459  1.1  christos 	    }
   1460  1.1  christos 	  else
   1461  1.1  christos 	    {
   1462  1.1  christos 	      re_node_set_free (&eps_via_nodes);
   1463  1.1  christos 	      if (prev_idx_match_malloced)
   1464  1.1  christos 		re_free (prev_idx_match);
   1465  1.1  christos 	      return REG_NOERROR;
   1466  1.1  christos 	    }
   1467  1.1  christos 	}
   1468  1.1  christos 
   1469  1.1  christos       /* Proceed to next node.  */
   1470  1.1  christos       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
   1471  1.1  christos 				    &eps_via_nodes, fs);
   1472  1.1  christos 
   1473  1.1  christos       if (BE (! REG_VALID_INDEX (cur_node), 0))
   1474  1.1  christos 	{
   1475  1.1  christos 	  if (BE (cur_node == REG_ERROR, 0))
   1476  1.1  christos 	    {
   1477  1.1  christos 	      re_node_set_free (&eps_via_nodes);
   1478  1.1  christos 	      if (prev_idx_match_malloced)
   1479  1.1  christos 		re_free (prev_idx_match);
   1480  1.1  christos 	      free_fail_stack_return (fs);
   1481  1.1  christos 	      return REG_ESPACE;
   1482  1.1  christos 	    }
   1483  1.1  christos 	  if (fs)
   1484  1.1  christos 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
   1485  1.1  christos 				       &eps_via_nodes);
   1486  1.1  christos 	  else
   1487  1.1  christos 	    {
   1488  1.1  christos 	      re_node_set_free (&eps_via_nodes);
   1489  1.1  christos 	      if (prev_idx_match_malloced)
   1490  1.1  christos 		re_free (prev_idx_match);
   1491  1.1  christos 	      return REG_NOMATCH;
   1492  1.1  christos 	    }
   1493  1.1  christos 	}
   1494  1.1  christos     }
   1495  1.1  christos   re_node_set_free (&eps_via_nodes);
   1496  1.1  christos   if (prev_idx_match_malloced)
   1497  1.1  christos     re_free (prev_idx_match);
   1498  1.1  christos   return free_fail_stack_return (fs);
   1499  1.1  christos }
   1500  1.1  christos 
   1501  1.1  christos static reg_errcode_t
   1502  1.1  christos internal_function
   1503  1.1  christos free_fail_stack_return (struct re_fail_stack_t *fs)
   1504  1.1  christos {
   1505  1.1  christos   if (fs)
   1506  1.1  christos     {
   1507  1.1  christos       Idx fs_idx;
   1508  1.1  christos       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
   1509  1.1  christos 	{
   1510  1.1  christos 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
   1511  1.1  christos 	  re_free (fs->stack[fs_idx].regs);
   1512  1.1  christos 	}
   1513  1.1  christos       re_free (fs->stack);
   1514  1.1  christos     }
   1515  1.1  christos   return REG_NOERROR;
   1516  1.1  christos }
   1517  1.1  christos 
   1518  1.1  christos static void
   1519  1.1  christos internal_function
   1520  1.1  christos update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
   1521  1.1  christos 	     Idx cur_node, Idx cur_idx, Idx nmatch)
   1522  1.1  christos {
   1523  1.1  christos   int type = dfa->nodes[cur_node].type;
   1524  1.1  christos   if (type == OP_OPEN_SUBEXP)
   1525  1.1  christos     {
   1526  1.1  christos       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
   1527  1.1  christos 
   1528  1.1  christos       /* We are at the first node of this sub expression.  */
   1529  1.1  christos       if (reg_num < nmatch)
   1530  1.1  christos 	{
   1531  1.1  christos 	  pmatch[reg_num].rm_so = cur_idx;
   1532  1.1  christos 	  pmatch[reg_num].rm_eo = -1;
   1533  1.1  christos 	}
   1534  1.1  christos     }
   1535  1.1  christos   else if (type == OP_CLOSE_SUBEXP)
   1536  1.1  christos     {
   1537  1.1  christos       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
   1538  1.1  christos       if (reg_num < nmatch)
   1539  1.1  christos 	{
   1540  1.1  christos 	  /* We are at the last node of this sub expression.  */
   1541  1.1  christos 	  if (pmatch[reg_num].rm_so < cur_idx)
   1542  1.1  christos 	    {
   1543  1.1  christos 	      pmatch[reg_num].rm_eo = cur_idx;
   1544  1.1  christos 	      /* This is a non-empty match or we are not inside an optional
   1545  1.1  christos 		 subexpression.  Accept this right away.  */
   1546  1.1  christos 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
   1547  1.1  christos 	    }
   1548  1.1  christos 	  else
   1549  1.1  christos 	    {
   1550  1.1  christos 	      if (dfa->nodes[cur_node].opt_subexp
   1551  1.1  christos 		  && prev_idx_match[reg_num].rm_so != -1)
   1552  1.1  christos 		/* We transited through an empty match for an optional
   1553  1.1  christos 		   subexpression, like (a?)*, and this is not the subexp's
   1554  1.1  christos 		   first match.  Copy back the old content of the registers
   1555  1.1  christos 		   so that matches of an inner subexpression are undone as
   1556  1.1  christos 		   well, like in ((a?))*.  */
   1557  1.1  christos 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
   1558  1.1  christos 	      else
   1559  1.1  christos 		/* We completed a subexpression, but it may be part of
   1560  1.1  christos 		   an optional one, so do not update PREV_IDX_MATCH.  */
   1561  1.1  christos 		pmatch[reg_num].rm_eo = cur_idx;
   1562  1.1  christos 	    }
   1563  1.1  christos 	}
   1564  1.1  christos     }
   1565  1.1  christos }
   1566  1.1  christos 
   1567  1.1  christos /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
   1568  1.1  christos    and sift the nodes in each states according to the following rules.
   1569  1.1  christos    Updated state_log will be wrote to STATE_LOG.
   1570  1.1  christos 
   1571  1.1  christos    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
   1572  1.1  christos      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
   1573  1.1  christos 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
   1574  1.1  christos 	the LAST_NODE, we throw away the node `a'.
   1575  1.1  christos      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
   1576  1.1  christos 	string `s' and transit to `b':
   1577  1.1  christos 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
   1578  1.1  christos 	   away the node `a'.
   1579  1.1  christos 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
   1580  1.1  christos 	    thrown away, we throw away the node `a'.
   1581  1.1  christos      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
   1582  1.1  christos 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
   1583  1.1  christos 	   node `a'.
   1584  1.1  christos 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
   1585  1.1  christos 	    we throw away the node `a'.  */
   1586  1.1  christos 
   1587  1.1  christos #define STATE_NODE_CONTAINS(state,node) \
   1588  1.1  christos   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
   1589  1.1  christos 
   1590  1.1  christos static reg_errcode_t
   1591  1.1  christos internal_function
   1592  1.1  christos sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
   1593  1.1  christos {
   1594  1.1  christos   reg_errcode_t err;
   1595  1.1  christos   int null_cnt = 0;
   1596  1.1  christos   Idx str_idx = sctx->last_str_idx;
   1597  1.1  christos   re_node_set cur_dest;
   1598  1.1  christos 
   1599  1.1  christos #ifdef DEBUG
   1600  1.1  christos   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
   1601  1.1  christos #endif
   1602  1.1  christos 
   1603  1.1  christos   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
   1604  1.1  christos      transit to the last_node and the last_node itself.  */
   1605  1.1  christos   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
   1606  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1607  1.1  christos     return err;
   1608  1.1  christos   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
   1609  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1610  1.1  christos     goto free_return;
   1611  1.1  christos 
   1612  1.1  christos   /* Then check each states in the state_log.  */
   1613  1.1  christos   while (str_idx > 0)
   1614  1.1  christos     {
   1615  1.1  christos       /* Update counters.  */
   1616  1.1  christos       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
   1617  1.1  christos       if (null_cnt > mctx->max_mb_elem_len)
   1618  1.1  christos 	{
   1619  1.1  christos 	  memset (sctx->sifted_states, '\0',
   1620  1.1  christos 		  sizeof (re_dfastate_t *) * str_idx);
   1621  1.1  christos 	  re_node_set_free (&cur_dest);
   1622  1.1  christos 	  return REG_NOERROR;
   1623  1.1  christos 	}
   1624  1.1  christos       re_node_set_empty (&cur_dest);
   1625  1.1  christos       --str_idx;
   1626  1.1  christos 
   1627  1.1  christos       if (mctx->state_log[str_idx])
   1628  1.1  christos 	{
   1629  1.1  christos 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
   1630  1.1  christos           if (BE (err != REG_NOERROR, 0))
   1631  1.1  christos 	    goto free_return;
   1632  1.1  christos 	}
   1633  1.1  christos 
   1634  1.1  christos       /* Add all the nodes which satisfy the following conditions:
   1635  1.1  christos 	 - It can epsilon transit to a node in CUR_DEST.
   1636  1.1  christos 	 - It is in CUR_SRC.
   1637  1.1  christos 	 And update state_log.  */
   1638  1.1  christos       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
   1639  1.1  christos       if (BE (err != REG_NOERROR, 0))
   1640  1.1  christos 	goto free_return;
   1641  1.1  christos     }
   1642  1.1  christos   err = REG_NOERROR;
   1643  1.1  christos  free_return:
   1644  1.1  christos   re_node_set_free (&cur_dest);
   1645  1.1  christos   return err;
   1646  1.1  christos }
   1647  1.1  christos 
   1648  1.1  christos static reg_errcode_t
   1649  1.1  christos internal_function
   1650  1.1  christos build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
   1651  1.1  christos 		     Idx str_idx, re_node_set *cur_dest)
   1652  1.1  christos {
   1653  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1654  1.1  christos   re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
   1655  1.1  christos   Idx i;
   1656  1.1  christos 
   1657  1.1  christos   /* Then build the next sifted state.
   1658  1.1  christos      We build the next sifted state on `cur_dest', and update
   1659  1.1  christos      `sifted_states[str_idx]' with `cur_dest'.
   1660  1.1  christos      Note:
   1661  1.1  christos      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
   1662  1.1  christos      `cur_src' points the node_set of the old `state_log[str_idx]'
   1663  1.1  christos      (with the epsilon nodes pre-filtered out).  */
   1664  1.1  christos   for (i = 0; i < cur_src->nelem; i++)
   1665  1.1  christos     {
   1666  1.1  christos       Idx prev_node = cur_src->elems[i];
   1667  1.1  christos       int naccepted = 0;
   1668  1.1  christos       bool ok;
   1669  1.1  christos 
   1670  1.1  christos #ifdef DEBUG
   1671  1.1  christos       re_token_type_t type = dfa->nodes[prev_node].type;
   1672  1.1  christos       assert (!IS_EPSILON_NODE (type));
   1673  1.1  christos #endif
   1674  1.1  christos #ifdef RE_ENABLE_I18N
   1675  1.1  christos       /* If the node may accept `multi byte'.  */
   1676  1.1  christos       if (dfa->nodes[prev_node].accept_mb)
   1677  1.1  christos 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
   1678  1.1  christos 					 str_idx, sctx->last_str_idx);
   1679  1.1  christos #endif /* RE_ENABLE_I18N */
   1680  1.1  christos 
   1681  1.1  christos       /* We don't check backreferences here.
   1682  1.1  christos 	 See update_cur_sifted_state().  */
   1683  1.1  christos       if (!naccepted
   1684  1.1  christos 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
   1685  1.1  christos 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
   1686  1.1  christos 				  dfa->nexts[prev_node]))
   1687  1.1  christos 	naccepted = 1;
   1688  1.1  christos 
   1689  1.1  christos       if (naccepted == 0)
   1690  1.1  christos 	continue;
   1691  1.1  christos 
   1692  1.1  christos       if (sctx->limits.nelem)
   1693  1.1  christos 	{
   1694  1.1  christos 	  Idx to_idx = str_idx + naccepted;
   1695  1.1  christos 	  if (check_dst_limits (mctx, &sctx->limits,
   1696  1.1  christos 				dfa->nexts[prev_node], to_idx,
   1697  1.1  christos 				prev_node, str_idx))
   1698  1.1  christos 	    continue;
   1699  1.1  christos 	}
   1700  1.1  christos       ok = re_node_set_insert (cur_dest, prev_node);
   1701  1.1  christos       if (BE (! ok, 0))
   1702  1.1  christos 	return REG_ESPACE;
   1703  1.1  christos     }
   1704  1.1  christos 
   1705  1.1  christos   return REG_NOERROR;
   1706  1.1  christos }
   1707  1.1  christos 
   1708  1.1  christos /* Helper functions.  */
   1709  1.1  christos 
   1710  1.1  christos static reg_errcode_t
   1711  1.1  christos internal_function
   1712  1.1  christos clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
   1713  1.1  christos {
   1714  1.1  christos   Idx top = mctx->state_log_top;
   1715  1.1  christos 
   1716  1.1  christos   if (next_state_log_idx >= mctx->input.bufs_len
   1717  1.1  christos       || (next_state_log_idx >= mctx->input.valid_len
   1718  1.1  christos 	  && mctx->input.valid_len < mctx->input.len))
   1719  1.1  christos     {
   1720  1.1  christos       reg_errcode_t err;
   1721  1.1  christos       err = extend_buffers (mctx);
   1722  1.1  christos       if (BE (err != REG_NOERROR, 0))
   1723  1.1  christos 	return err;
   1724  1.1  christos     }
   1725  1.1  christos 
   1726  1.1  christos   if (top < next_state_log_idx)
   1727  1.1  christos     {
   1728  1.1  christos       memset (mctx->state_log + top + 1, '\0',
   1729  1.1  christos 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
   1730  1.1  christos       mctx->state_log_top = next_state_log_idx;
   1731  1.1  christos     }
   1732  1.1  christos   return REG_NOERROR;
   1733  1.1  christos }
   1734  1.1  christos 
   1735  1.1  christos static reg_errcode_t
   1736  1.1  christos internal_function
   1737  1.1  christos merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
   1738  1.1  christos 		   Idx num)
   1739  1.1  christos {
   1740  1.1  christos   Idx st_idx;
   1741  1.1  christos   reg_errcode_t err;
   1742  1.1  christos   for (st_idx = 0; st_idx < num; ++st_idx)
   1743  1.1  christos     {
   1744  1.1  christos       if (dst[st_idx] == NULL)
   1745  1.1  christos 	dst[st_idx] = src[st_idx];
   1746  1.1  christos       else if (src[st_idx] != NULL)
   1747  1.1  christos 	{
   1748  1.1  christos 	  re_node_set merged_set;
   1749  1.1  christos 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
   1750  1.1  christos 					&src[st_idx]->nodes);
   1751  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   1752  1.1  christos 	    return err;
   1753  1.1  christos 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
   1754  1.1  christos 	  re_node_set_free (&merged_set);
   1755  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   1756  1.1  christos 	    return err;
   1757  1.1  christos 	}
   1758  1.1  christos     }
   1759  1.1  christos   return REG_NOERROR;
   1760  1.1  christos }
   1761  1.1  christos 
   1762  1.1  christos static reg_errcode_t
   1763  1.1  christos internal_function
   1764  1.1  christos update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
   1765  1.1  christos 			 Idx str_idx, re_node_set *dest_nodes)
   1766  1.1  christos {
   1767  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1768  1.1  christos   reg_errcode_t err;
   1769  1.1  christos   const re_node_set *candidates;
   1770  1.1  christos   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
   1771  1.1  christos 		: &mctx->state_log[str_idx]->nodes);
   1772  1.1  christos 
   1773  1.1  christos   if (dest_nodes->nelem == 0)
   1774  1.1  christos     sctx->sifted_states[str_idx] = NULL;
   1775  1.1  christos   else
   1776  1.1  christos     {
   1777  1.1  christos       if (candidates)
   1778  1.1  christos 	{
   1779  1.1  christos 	  /* At first, add the nodes which can epsilon transit to a node in
   1780  1.1  christos 	     DEST_NODE.  */
   1781  1.1  christos 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
   1782  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   1783  1.1  christos 	    return err;
   1784  1.1  christos 
   1785  1.1  christos 	  /* Then, check the limitations in the current sift_context.  */
   1786  1.1  christos 	  if (sctx->limits.nelem)
   1787  1.1  christos 	    {
   1788  1.1  christos 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
   1789  1.1  christos 					 mctx->bkref_ents, str_idx);
   1790  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   1791  1.1  christos 		return err;
   1792  1.1  christos 	    }
   1793  1.1  christos 	}
   1794  1.1  christos 
   1795  1.1  christos       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
   1796  1.1  christos       if (BE (err != REG_NOERROR, 0))
   1797  1.1  christos 	return err;
   1798  1.1  christos     }
   1799  1.1  christos 
   1800  1.1  christos   if (candidates && mctx->state_log[str_idx]->has_backref)
   1801  1.1  christos     {
   1802  1.1  christos       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
   1803  1.1  christos       if (BE (err != REG_NOERROR, 0))
   1804  1.1  christos 	return err;
   1805  1.1  christos     }
   1806  1.1  christos   return REG_NOERROR;
   1807  1.1  christos }
   1808  1.1  christos 
   1809  1.1  christos static reg_errcode_t
   1810  1.1  christos internal_function
   1811  1.1  christos add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
   1812  1.1  christos 		       const re_node_set *candidates)
   1813  1.1  christos {
   1814  1.1  christos   reg_errcode_t err = REG_NOERROR;
   1815  1.1  christos   Idx i;
   1816  1.1  christos 
   1817  1.1  christos   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
   1818  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1819  1.1  christos     return err;
   1820  1.1  christos 
   1821  1.1  christos   if (!state->inveclosure.alloc)
   1822  1.1  christos     {
   1823  1.1  christos       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
   1824  1.1  christos       if (BE (err != REG_NOERROR, 0))
   1825  1.1  christos         return REG_ESPACE;
   1826  1.1  christos       for (i = 0; i < dest_nodes->nelem; i++)
   1827  1.1  christos         re_node_set_merge (&state->inveclosure,
   1828  1.1  christos 			   dfa->inveclosures + dest_nodes->elems[i]);
   1829  1.1  christos     }
   1830  1.1  christos   return re_node_set_add_intersect (dest_nodes, candidates,
   1831  1.1  christos 				    &state->inveclosure);
   1832  1.1  christos }
   1833  1.1  christos 
   1834  1.1  christos static reg_errcode_t
   1835  1.1  christos internal_function
   1836  1.1  christos sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
   1837  1.1  christos 		       const re_node_set *candidates)
   1838  1.1  christos {
   1839  1.1  christos     Idx ecl_idx;
   1840  1.1  christos     reg_errcode_t err;
   1841  1.1  christos     re_node_set *inv_eclosure = dfa->inveclosures + node;
   1842  1.1  christos     re_node_set except_nodes;
   1843  1.1  christos     re_node_set_init_empty (&except_nodes);
   1844  1.1  christos     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
   1845  1.1  christos       {
   1846  1.1  christos 	Idx cur_node = inv_eclosure->elems[ecl_idx];
   1847  1.1  christos 	if (cur_node == node)
   1848  1.1  christos 	  continue;
   1849  1.1  christos 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
   1850  1.1  christos 	  {
   1851  1.1  christos 	    Idx edst1 = dfa->edests[cur_node].elems[0];
   1852  1.1  christos 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
   1853  1.1  christos 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
   1854  1.1  christos 	    if ((!re_node_set_contains (inv_eclosure, edst1)
   1855  1.1  christos 		 && re_node_set_contains (dest_nodes, edst1))
   1856  1.1  christos 		|| (REG_VALID_NONZERO_INDEX (edst2)
   1857  1.1  christos 		    && !re_node_set_contains (inv_eclosure, edst2)
   1858  1.1  christos 		    && re_node_set_contains (dest_nodes, edst2)))
   1859  1.1  christos 	      {
   1860  1.1  christos 		err = re_node_set_add_intersect (&except_nodes, candidates,
   1861  1.1  christos 						 dfa->inveclosures + cur_node);
   1862  1.1  christos 		if (BE (err != REG_NOERROR, 0))
   1863  1.1  christos 		  {
   1864  1.1  christos 		    re_node_set_free (&except_nodes);
   1865  1.1  christos 		    return err;
   1866  1.1  christos 		  }
   1867  1.1  christos 	      }
   1868  1.1  christos 	  }
   1869  1.1  christos       }
   1870  1.1  christos     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
   1871  1.1  christos       {
   1872  1.1  christos 	Idx cur_node = inv_eclosure->elems[ecl_idx];
   1873  1.1  christos 	if (!re_node_set_contains (&except_nodes, cur_node))
   1874  1.1  christos 	  {
   1875  1.1  christos 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
   1876  1.1  christos 	    re_node_set_remove_at (dest_nodes, idx);
   1877  1.1  christos 	  }
   1878  1.1  christos       }
   1879  1.1  christos     re_node_set_free (&except_nodes);
   1880  1.1  christos     return REG_NOERROR;
   1881  1.1  christos }
   1882  1.1  christos 
   1883  1.1  christos static bool
   1884  1.1  christos internal_function
   1885  1.1  christos check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
   1886  1.1  christos 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
   1887  1.1  christos {
   1888  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1889  1.1  christos   Idx lim_idx, src_pos, dst_pos;
   1890  1.1  christos 
   1891  1.1  christos   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
   1892  1.1  christos   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
   1893  1.1  christos   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
   1894  1.1  christos     {
   1895  1.1  christos       Idx subexp_idx;
   1896  1.1  christos       struct re_backref_cache_entry *ent;
   1897  1.1  christos       ent = mctx->bkref_ents + limits->elems[lim_idx];
   1898  1.1  christos       subexp_idx = dfa->nodes[ent->node].opr.idx;
   1899  1.1  christos 
   1900  1.1  christos       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
   1901  1.1  christos 					   subexp_idx, dst_node, dst_idx,
   1902  1.1  christos 					   dst_bkref_idx);
   1903  1.1  christos       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
   1904  1.1  christos 					   subexp_idx, src_node, src_idx,
   1905  1.1  christos 					   src_bkref_idx);
   1906  1.1  christos 
   1907  1.1  christos       /* In case of:
   1908  1.1  christos 	 <src> <dst> ( <subexp> )
   1909  1.1  christos 	 ( <subexp> ) <src> <dst>
   1910  1.1  christos 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
   1911  1.1  christos       if (src_pos == dst_pos)
   1912  1.1  christos 	continue; /* This is unrelated limitation.  */
   1913  1.1  christos       else
   1914  1.1  christos 	return true;
   1915  1.1  christos     }
   1916  1.1  christos   return false;
   1917  1.1  christos }
   1918  1.1  christos 
   1919  1.1  christos static int
   1920  1.1  christos internal_function
   1921  1.1  christos check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
   1922  1.1  christos 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
   1923  1.1  christos {
   1924  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   1925  1.1  christos   re_node_set *eclosures = dfa->eclosures + from_node;
   1926  1.1  christos   Idx node_idx;
   1927  1.1  christos 
   1928  1.1  christos   /* Else, we are on the boundary: examine the nodes on the epsilon
   1929  1.1  christos      closure.  */
   1930  1.1  christos   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
   1931  1.1  christos     {
   1932  1.1  christos       Idx node = eclosures->elems[node_idx];
   1933  1.1  christos       switch (dfa->nodes[node].type)
   1934  1.1  christos 	{
   1935  1.1  christos 	case OP_BACK_REF:
   1936  1.1  christos 	  if (bkref_idx != REG_MISSING)
   1937  1.1  christos 	    {
   1938  1.1  christos 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
   1939  1.1  christos 	      do
   1940  1.1  christos 	        {
   1941  1.1  christos 		  Idx dst;
   1942  1.1  christos 		  int cpos;
   1943  1.1  christos 
   1944  1.1  christos 		  if (ent->node != node)
   1945  1.1  christos 		    continue;
   1946  1.1  christos 
   1947  1.1  christos 		  if (subexp_idx < BITSET_WORD_BITS
   1948  1.1  christos 		      && !(ent->eps_reachable_subexps_map
   1949  1.1  christos 			   & ((bitset_word) 1 << subexp_idx)))
   1950  1.1  christos 		    continue;
   1951  1.1  christos 
   1952  1.1  christos 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
   1953  1.1  christos 		     OP_CLOSE_SUBEXP cases below.  But, if the
   1954  1.1  christos 		     destination node is the same node as the source
   1955  1.1  christos 		     node, don't recurse because it would cause an
   1956  1.1  christos 		     infinite loop: a regex that exhibits this behavior
   1957  1.1  christos 		     is ()\1*\1*  */
   1958  1.1  christos 		  dst = dfa->edests[node].elems[0];
   1959  1.1  christos 		  if (dst == from_node)
   1960  1.1  christos 		    {
   1961  1.1  christos 		      if (boundaries & 1)
   1962  1.1  christos 		        return -1;
   1963  1.1  christos 		      else /* if (boundaries & 2) */
   1964  1.1  christos 		        return 0;
   1965  1.1  christos 		    }
   1966  1.1  christos 
   1967  1.1  christos 		  cpos =
   1968  1.1  christos 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
   1969  1.1  christos 						 dst, bkref_idx);
   1970  1.1  christos 		  if (cpos == -1 /* && (boundaries & 1) */)
   1971  1.1  christos 		    return -1;
   1972  1.1  christos 		  if (cpos == 0 && (boundaries & 2))
   1973  1.1  christos 		    return 0;
   1974  1.1  christos 
   1975  1.1  christos 		  if (subexp_idx < BITSET_WORD_BITS)
   1976  1.1  christos 		    ent->eps_reachable_subexps_map &=
   1977  1.1  christos 		      ~ ((bitset_word) 1 << subexp_idx);
   1978  1.1  christos 	        }
   1979  1.1  christos 	      while (ent++->more);
   1980  1.1  christos 	    }
   1981  1.1  christos 	  break;
   1982  1.1  christos 
   1983  1.1  christos 	case OP_OPEN_SUBEXP:
   1984  1.1  christos 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
   1985  1.1  christos 	    return -1;
   1986  1.1  christos 	  break;
   1987  1.1  christos 
   1988  1.1  christos 	case OP_CLOSE_SUBEXP:
   1989  1.1  christos 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
   1990  1.1  christos 	    return 0;
   1991  1.1  christos 	  break;
   1992  1.1  christos 
   1993  1.1  christos 	default:
   1994  1.1  christos 	    break;
   1995  1.1  christos 	}
   1996  1.1  christos     }
   1997  1.1  christos 
   1998  1.1  christos   return (boundaries & 2) ? 1 : 0;
   1999  1.1  christos }
   2000  1.1  christos 
   2001  1.1  christos static int
   2002  1.1  christos internal_function
   2003  1.1  christos check_dst_limits_calc_pos (const re_match_context_t *mctx,
   2004  1.1  christos 			   Idx limit, Idx subexp_idx,
   2005  1.1  christos 			   Idx from_node, Idx str_idx, Idx bkref_idx)
   2006  1.1  christos {
   2007  1.1  christos   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
   2008  1.1  christos   int boundaries;
   2009  1.1  christos 
   2010  1.1  christos   /* If we are outside the range of the subexpression, return -1 or 1.  */
   2011  1.1  christos   if (str_idx < lim->subexp_from)
   2012  1.1  christos     return -1;
   2013  1.1  christos 
   2014  1.1  christos   if (lim->subexp_to < str_idx)
   2015  1.1  christos     return 1;
   2016  1.1  christos 
   2017  1.1  christos   /* If we are within the subexpression, return 0.  */
   2018  1.1  christos   boundaries = (str_idx == lim->subexp_from);
   2019  1.1  christos   boundaries |= (str_idx == lim->subexp_to) << 1;
   2020  1.1  christos   if (boundaries == 0)
   2021  1.1  christos     return 0;
   2022  1.1  christos 
   2023  1.1  christos   /* Else, examine epsilon closure.  */
   2024  1.1  christos   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
   2025  1.1  christos 				      from_node, bkref_idx);
   2026  1.1  christos }
   2027  1.1  christos 
   2028  1.1  christos /* Check the limitations of sub expressions LIMITS, and remove the nodes
   2029  1.1  christos    which are against limitations from DEST_NODES. */
   2030  1.1  christos 
   2031  1.1  christos static reg_errcode_t
   2032  1.1  christos internal_function
   2033  1.1  christos check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
   2034  1.1  christos 		     const re_node_set *candidates, re_node_set *limits,
   2035  1.1  christos 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
   2036  1.1  christos {
   2037  1.1  christos   reg_errcode_t err;
   2038  1.1  christos   Idx node_idx, lim_idx;
   2039  1.1  christos 
   2040  1.1  christos   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
   2041  1.1  christos     {
   2042  1.1  christos       Idx subexp_idx;
   2043  1.1  christos       struct re_backref_cache_entry *ent;
   2044  1.1  christos       ent = bkref_ents + limits->elems[lim_idx];
   2045  1.1  christos 
   2046  1.1  christos       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
   2047  1.1  christos 	continue; /* This is unrelated limitation.  */
   2048  1.1  christos 
   2049  1.1  christos       subexp_idx = dfa->nodes[ent->node].opr.idx;
   2050  1.1  christos       if (ent->subexp_to == str_idx)
   2051  1.1  christos 	{
   2052  1.1  christos 	  Idx ops_node = REG_MISSING;
   2053  1.1  christos 	  Idx cls_node = REG_MISSING;
   2054  1.1  christos 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2055  1.1  christos 	    {
   2056  1.1  christos 	      Idx node = dest_nodes->elems[node_idx];
   2057  1.1  christos 	      re_token_type_t type = dfa->nodes[node].type;
   2058  1.1  christos 	      if (type == OP_OPEN_SUBEXP
   2059  1.1  christos 		  && subexp_idx == dfa->nodes[node].opr.idx)
   2060  1.1  christos 		ops_node = node;
   2061  1.1  christos 	      else if (type == OP_CLOSE_SUBEXP
   2062  1.1  christos 		       && subexp_idx == dfa->nodes[node].opr.idx)
   2063  1.1  christos 		cls_node = node;
   2064  1.1  christos 	    }
   2065  1.1  christos 
   2066  1.1  christos 	  /* Check the limitation of the open subexpression.  */
   2067  1.1  christos 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
   2068  1.1  christos 	  if (REG_VALID_INDEX (ops_node))
   2069  1.1  christos 	    {
   2070  1.1  christos 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
   2071  1.1  christos 					   candidates);
   2072  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2073  1.1  christos 		return err;
   2074  1.1  christos 	    }
   2075  1.1  christos 
   2076  1.1  christos 	  /* Check the limitation of the close subexpression.  */
   2077  1.1  christos 	  if (REG_VALID_INDEX (cls_node))
   2078  1.1  christos 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2079  1.1  christos 	      {
   2080  1.1  christos 		Idx node = dest_nodes->elems[node_idx];
   2081  1.1  christos 		if (!re_node_set_contains (dfa->inveclosures + node,
   2082  1.1  christos 					   cls_node)
   2083  1.1  christos 		    && !re_node_set_contains (dfa->eclosures + node,
   2084  1.1  christos 					      cls_node))
   2085  1.1  christos 		  {
   2086  1.1  christos 		    /* It is against this limitation.
   2087  1.1  christos 		       Remove it form the current sifted state.  */
   2088  1.1  christos 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
   2089  1.1  christos 						 candidates);
   2090  1.1  christos 		    if (BE (err != REG_NOERROR, 0))
   2091  1.1  christos 		      return err;
   2092  1.1  christos 		    --node_idx;
   2093  1.1  christos 		  }
   2094  1.1  christos 	      }
   2095  1.1  christos 	}
   2096  1.1  christos       else /* (ent->subexp_to != str_idx)  */
   2097  1.1  christos 	{
   2098  1.1  christos 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2099  1.1  christos 	    {
   2100  1.1  christos 	      Idx node = dest_nodes->elems[node_idx];
   2101  1.1  christos 	      re_token_type_t type = dfa->nodes[node].type;
   2102  1.1  christos 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
   2103  1.1  christos 		{
   2104  1.1  christos 		  if (subexp_idx != dfa->nodes[node].opr.idx)
   2105  1.1  christos 		    continue;
   2106  1.1  christos 		  /* It is against this limitation.
   2107  1.1  christos 		     Remove it form the current sifted state.  */
   2108  1.1  christos 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
   2109  1.1  christos 					       candidates);
   2110  1.1  christos 		  if (BE (err != REG_NOERROR, 0))
   2111  1.1  christos 		    return err;
   2112  1.1  christos 		}
   2113  1.1  christos 	    }
   2114  1.1  christos 	}
   2115  1.1  christos     }
   2116  1.1  christos   return REG_NOERROR;
   2117  1.1  christos }
   2118  1.1  christos 
   2119  1.1  christos static reg_errcode_t
   2120  1.1  christos internal_function
   2121  1.1  christos sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
   2122  1.1  christos 		   Idx str_idx, const re_node_set *candidates)
   2123  1.1  christos {
   2124  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2125  1.1  christos   reg_errcode_t err;
   2126  1.1  christos   Idx node_idx, node;
   2127  1.1  christos   re_sift_context_t local_sctx;
   2128  1.1  christos   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
   2129  1.1  christos 
   2130  1.1  christos   if (first_idx == REG_MISSING)
   2131  1.1  christos     return REG_NOERROR;
   2132  1.1  christos 
   2133  1.1  christos   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
   2134  1.1  christos 
   2135  1.1  christos   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
   2136  1.1  christos     {
   2137  1.1  christos       Idx enabled_idx;
   2138  1.1  christos       re_token_type_t type;
   2139  1.1  christos       struct re_backref_cache_entry *entry;
   2140  1.1  christos       node = candidates->elems[node_idx];
   2141  1.1  christos       type = dfa->nodes[node].type;
   2142  1.1  christos       /* Avoid infinite loop for the REs like "()\1+".  */
   2143  1.1  christos       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
   2144  1.1  christos 	continue;
   2145  1.1  christos       if (type != OP_BACK_REF)
   2146  1.1  christos 	continue;
   2147  1.1  christos 
   2148  1.1  christos       entry = mctx->bkref_ents + first_idx;
   2149  1.1  christos       enabled_idx = first_idx;
   2150  1.1  christos       do
   2151  1.1  christos 	{
   2152  1.1  christos 	  bool ok;
   2153  1.1  christos 	  Idx subexp_len, to_idx, dst_node;
   2154  1.1  christos 	  re_dfastate_t *cur_state;
   2155  1.1  christos 
   2156  1.1  christos 	  if (entry->node != node)
   2157  1.1  christos 	    continue;
   2158  1.1  christos 	  subexp_len = entry->subexp_to - entry->subexp_from;
   2159  1.1  christos 	  to_idx = str_idx + subexp_len;
   2160  1.1  christos 	  dst_node = (subexp_len ? dfa->nexts[node]
   2161  1.1  christos 		      : dfa->edests[node].elems[0]);
   2162  1.1  christos 
   2163  1.1  christos 	  if (to_idx > sctx->last_str_idx
   2164  1.1  christos 	      || sctx->sifted_states[to_idx] == NULL
   2165  1.1  christos 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
   2166  1.1  christos 	      || check_dst_limits (mctx, &sctx->limits, node,
   2167  1.1  christos 				   str_idx, dst_node, to_idx))
   2168  1.1  christos 	    continue;
   2169  1.1  christos 
   2170  1.1  christos 	  if (local_sctx.sifted_states == NULL)
   2171  1.1  christos 	    {
   2172  1.1  christos 	      local_sctx = *sctx;
   2173  1.1  christos 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
   2174  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2175  1.1  christos 		goto free_return;
   2176  1.1  christos 	    }
   2177  1.1  christos 	  local_sctx.last_node = node;
   2178  1.1  christos 	  local_sctx.last_str_idx = str_idx;
   2179  1.1  christos 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
   2180  1.1  christos 	  if (BE (! ok, 0))
   2181  1.1  christos 	    {
   2182  1.1  christos 	      err = REG_ESPACE;
   2183  1.1  christos 	      goto free_return;
   2184  1.1  christos 	    }
   2185  1.1  christos 	  cur_state = local_sctx.sifted_states[str_idx];
   2186  1.1  christos 	  err = sift_states_backward (mctx, &local_sctx);
   2187  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2188  1.1  christos 	    goto free_return;
   2189  1.1  christos 	  if (sctx->limited_states != NULL)
   2190  1.1  christos 	    {
   2191  1.1  christos 	      err = merge_state_array (dfa, sctx->limited_states,
   2192  1.1  christos 				       local_sctx.sifted_states,
   2193  1.1  christos 				       str_idx + 1);
   2194  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2195  1.1  christos 		goto free_return;
   2196  1.1  christos 	    }
   2197  1.1  christos 	  local_sctx.sifted_states[str_idx] = cur_state;
   2198  1.1  christos 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
   2199  1.1  christos 
   2200  1.1  christos 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
   2201  1.1  christos           entry = mctx->bkref_ents + enabled_idx;
   2202  1.1  christos 	}
   2203  1.1  christos       while (enabled_idx++, entry++->more);
   2204  1.1  christos     }
   2205  1.1  christos   err = REG_NOERROR;
   2206  1.1  christos  free_return:
   2207  1.1  christos   if (local_sctx.sifted_states != NULL)
   2208  1.1  christos     {
   2209  1.1  christos       re_node_set_free (&local_sctx.limits);
   2210  1.1  christos     }
   2211  1.1  christos 
   2212  1.1  christos   return err;
   2213  1.1  christos }
   2214  1.1  christos 
   2215  1.1  christos 
   2216  1.1  christos #ifdef RE_ENABLE_I18N
   2217  1.1  christos static int
   2218  1.1  christos internal_function
   2219  1.1  christos sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
   2220  1.1  christos 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
   2221  1.1  christos {
   2222  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2223  1.1  christos   int naccepted;
   2224  1.1  christos   /* Check the node can accept `multi byte'.  */
   2225  1.1  christos   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
   2226  1.1  christos   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
   2227  1.1  christos       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
   2228  1.1  christos 			    dfa->nexts[node_idx]))
   2229  1.1  christos     /* The node can't accept the `multi byte', or the
   2230  1.1  christos        destination was already thrown away, then the node
   2231  1.1  christos        could't accept the current input `multi byte'.   */
   2232  1.1  christos     naccepted = 0;
   2233  1.1  christos   /* Otherwise, it is sure that the node could accept
   2234  1.1  christos      `naccepted' bytes input.  */
   2235  1.1  christos   return naccepted;
   2236  1.1  christos }
   2237  1.1  christos #endif /* RE_ENABLE_I18N */
   2238  1.1  christos 
   2239  1.1  christos 
   2240  1.1  christos /* Functions for state transition.  */
   2242  1.1  christos 
   2243  1.1  christos /* Return the next state to which the current state STATE will transit by
   2244  1.1  christos    accepting the current input byte, and update STATE_LOG if necessary.
   2245  1.1  christos    If STATE can accept a multibyte char/collating element/back reference
   2246  1.1  christos    update the destination of STATE_LOG.  */
   2247  1.1  christos 
   2248  1.1  christos static re_dfastate_t *
   2249  1.1  christos internal_function
   2250  1.1  christos transit_state (reg_errcode_t *err, re_match_context_t *mctx,
   2251  1.1  christos 	       re_dfastate_t *state)
   2252  1.1  christos {
   2253  1.1  christos   re_dfastate_t **trtable;
   2254  1.1  christos   unsigned char ch;
   2255  1.1  christos 
   2256  1.1  christos #ifdef RE_ENABLE_I18N
   2257  1.1  christos   /* If the current state can accept multibyte.  */
   2258  1.1  christos   if (BE (state->accept_mb, 0))
   2259  1.1  christos     {
   2260  1.1  christos       *err = transit_state_mb (mctx, state);
   2261  1.1  christos       if (BE (*err != REG_NOERROR, 0))
   2262  1.1  christos 	return NULL;
   2263  1.1  christos     }
   2264  1.1  christos #endif /* RE_ENABLE_I18N */
   2265  1.1  christos 
   2266  1.1  christos   /* Then decide the next state with the single byte.  */
   2267  1.1  christos #if 0
   2268  1.1  christos   if (0)
   2269  1.1  christos     /* don't use transition table  */
   2270  1.1  christos     return transit_state_sb (err, mctx, state);
   2271  1.1  christos #endif
   2272  1.1  christos 
   2273  1.1  christos   /* Use transition table  */
   2274  1.1  christos   ch = re_string_fetch_byte (&mctx->input);
   2275  1.1  christos   for (;;)
   2276  1.1  christos     {
   2277  1.1  christos       trtable = state->trtable;
   2278  1.1  christos       if (BE (trtable != NULL, 1))
   2279  1.1  christos 	return trtable[ch];
   2280  1.1  christos 
   2281  1.1  christos       trtable = state->word_trtable;
   2282  1.1  christos       if (BE (trtable != NULL, 1))
   2283  1.1  christos         {
   2284  1.1  christos 	  unsigned int context;
   2285  1.1  christos 	  context
   2286  1.1  christos 	    = re_string_context_at (&mctx->input,
   2287  1.1  christos 				    re_string_cur_idx (&mctx->input) - 1,
   2288  1.1  christos 				    mctx->eflags);
   2289  1.1  christos 	  if (IS_WORD_CONTEXT (context))
   2290  1.1  christos 	    return trtable[ch + SBC_MAX];
   2291  1.1  christos 	  else
   2292  1.1  christos 	    return trtable[ch];
   2293  1.1  christos 	}
   2294  1.1  christos 
   2295  1.1  christos       if (!build_trtable (mctx->dfa, state))
   2296  1.1  christos 	{
   2297  1.1  christos 	  *err = REG_ESPACE;
   2298  1.1  christos 	  return NULL;
   2299  1.1  christos 	}
   2300  1.1  christos 
   2301  1.1  christos       /* Retry, we now have a transition table.  */
   2302  1.1  christos     }
   2303  1.1  christos }
   2304  1.1  christos 
   2305  1.1  christos /* Update the state_log if we need */
   2306  1.1  christos re_dfastate_t *
   2307  1.1  christos internal_function
   2308  1.1  christos merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
   2309  1.1  christos 		      re_dfastate_t *next_state)
   2310  1.1  christos {
   2311  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2312  1.1  christos   Idx cur_idx = re_string_cur_idx (&mctx->input);
   2313  1.1  christos 
   2314  1.1  christos   if (cur_idx > mctx->state_log_top)
   2315  1.1  christos     {
   2316  1.1  christos       mctx->state_log[cur_idx] = next_state;
   2317  1.1  christos       mctx->state_log_top = cur_idx;
   2318  1.1  christos     }
   2319  1.1  christos   else if (mctx->state_log[cur_idx] == 0)
   2320  1.1  christos     {
   2321  1.1  christos       mctx->state_log[cur_idx] = next_state;
   2322  1.1  christos     }
   2323  1.1  christos   else
   2324  1.1  christos     {
   2325  1.1  christos       re_dfastate_t *pstate;
   2326  1.1  christos       unsigned int context;
   2327  1.1  christos       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
   2328  1.1  christos       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
   2329  1.1  christos          the destination of a multibyte char/collating element/
   2330  1.1  christos          back reference.  Then the next state is the union set of
   2331  1.1  christos          these destinations and the results of the transition table.  */
   2332  1.1  christos       pstate = mctx->state_log[cur_idx];
   2333  1.1  christos       log_nodes = pstate->entrance_nodes;
   2334  1.1  christos       if (next_state != NULL)
   2335  1.1  christos         {
   2336  1.1  christos           table_nodes = next_state->entrance_nodes;
   2337  1.1  christos           *err = re_node_set_init_union (&next_nodes, table_nodes,
   2338  1.1  christos 					     log_nodes);
   2339  1.1  christos           if (BE (*err != REG_NOERROR, 0))
   2340  1.1  christos 	    return NULL;
   2341  1.1  christos         }
   2342  1.1  christos       else
   2343  1.1  christos         next_nodes = *log_nodes;
   2344  1.1  christos       /* Note: We already add the nodes of the initial state,
   2345  1.1  christos 	 then we don't need to add them here.  */
   2346  1.1  christos 
   2347  1.1  christos       context = re_string_context_at (&mctx->input,
   2348  1.1  christos 				      re_string_cur_idx (&mctx->input) - 1,
   2349  1.1  christos 				      mctx->eflags);
   2350  1.1  christos       next_state = mctx->state_log[cur_idx]
   2351  1.1  christos         = re_acquire_state_context (err, dfa, &next_nodes, context);
   2352  1.1  christos       /* We don't need to check errors here, since the return value of
   2353  1.1  christos          this function is next_state and ERR is already set.  */
   2354  1.1  christos 
   2355  1.1  christos       if (table_nodes != NULL)
   2356  1.1  christos         re_node_set_free (&next_nodes);
   2357  1.1  christos     }
   2358  1.1  christos 
   2359  1.1  christos   if (BE (dfa->nbackref, 0) && next_state != NULL)
   2360  1.1  christos     {
   2361  1.1  christos       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
   2362  1.1  christos 	 later.  We must check them here, since the back references in the
   2363  1.1  christos 	 next state might use them.  */
   2364  1.1  christos       *err = check_subexp_matching_top (mctx, &next_state->nodes,
   2365  1.1  christos 					cur_idx);
   2366  1.1  christos       if (BE (*err != REG_NOERROR, 0))
   2367  1.1  christos 	return NULL;
   2368  1.1  christos 
   2369  1.1  christos       /* If the next state has back references.  */
   2370  1.1  christos       if (next_state->has_backref)
   2371  1.1  christos 	{
   2372  1.1  christos 	  *err = transit_state_bkref (mctx, &next_state->nodes);
   2373  1.1  christos 	  if (BE (*err != REG_NOERROR, 0))
   2374  1.1  christos 	    return NULL;
   2375  1.1  christos 	  next_state = mctx->state_log[cur_idx];
   2376  1.1  christos 	}
   2377  1.1  christos     }
   2378  1.1  christos 
   2379  1.1  christos   return next_state;
   2380  1.1  christos }
   2381  1.1  christos 
   2382  1.1  christos /* Skip bytes in the input that correspond to part of a
   2383  1.1  christos    multi-byte match, then look in the log for a state
   2384  1.1  christos    from which to restart matching.  */
   2385  1.1  christos static re_dfastate_t *
   2386  1.1  christos internal_function
   2387  1.1  christos find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
   2388  1.1  christos {
   2389  1.1  christos   re_dfastate_t *cur_state = NULL;
   2390  1.1  christos   do
   2391  1.1  christos     {
   2392  1.1  christos       Idx max = mctx->state_log_top;
   2393  1.1  christos       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   2394  1.1  christos 
   2395  1.1  christos       do
   2396  1.1  christos 	{
   2397  1.1  christos           if (++cur_str_idx > max)
   2398  1.1  christos             return NULL;
   2399  1.1  christos           re_string_skip_bytes (&mctx->input, 1);
   2400  1.1  christos 	}
   2401  1.1  christos       while (mctx->state_log[cur_str_idx] == NULL);
   2402  1.1  christos 
   2403  1.1  christos       cur_state = merge_state_with_log (err, mctx, NULL);
   2404  1.1  christos     }
   2405  1.1  christos   while (*err == REG_NOERROR && cur_state == NULL);
   2406  1.1  christos   return cur_state;
   2407  1.1  christos }
   2408  1.1  christos 
   2409  1.1  christos /* Helper functions for transit_state.  */
   2410  1.1  christos 
   2411  1.1  christos /* From the node set CUR_NODES, pick up the nodes whose types are
   2412  1.1  christos    OP_OPEN_SUBEXP and which have corresponding back references in the regular
   2413  1.1  christos    expression. And register them to use them later for evaluating the
   2414  1.1  christos    correspoding back references.  */
   2415  1.1  christos 
   2416  1.1  christos static reg_errcode_t
   2417  1.1  christos internal_function
   2418  1.1  christos check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
   2419  1.1  christos 			   Idx str_idx)
   2420  1.1  christos {
   2421  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2422  1.1  christos   Idx node_idx;
   2423  1.1  christos   reg_errcode_t err;
   2424  1.1  christos 
   2425  1.1  christos   /* TODO: This isn't efficient.
   2426  1.1  christos 	   Because there might be more than one nodes whose types are
   2427  1.1  christos 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
   2428  1.1  christos 	   nodes.
   2429  1.1  christos 	   E.g. RE: (a){2}  */
   2430  1.1  christos   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
   2431  1.1  christos     {
   2432  1.1  christos       Idx node = cur_nodes->elems[node_idx];
   2433  1.1  christos       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
   2434  1.1  christos 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
   2435  1.1  christos 	  && (dfa->used_bkref_map
   2436  1.1  christos 	      & ((bitset_word) 1 << dfa->nodes[node].opr.idx)))
   2437  1.1  christos 	{
   2438  1.1  christos 	  err = match_ctx_add_subtop (mctx, node, str_idx);
   2439  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2440  1.1  christos 	    return err;
   2441  1.1  christos 	}
   2442  1.1  christos     }
   2443  1.1  christos   return REG_NOERROR;
   2444  1.1  christos }
   2445  1.1  christos 
   2446  1.1  christos #if 0
   2447  1.1  christos /* Return the next state to which the current state STATE will transit by
   2448  1.1  christos    accepting the current input byte.  */
   2449  1.1  christos 
   2450  1.1  christos static re_dfastate_t *
   2451  1.1  christos transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
   2452  1.1  christos 		  re_dfastate_t *state)
   2453  1.1  christos {
   2454  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2455  1.1  christos   re_node_set next_nodes;
   2456  1.1  christos   re_dfastate_t *next_state;
   2457  1.1  christos   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
   2458  1.1  christos   unsigned int context;
   2459  1.1  christos 
   2460  1.1  christos   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
   2461  1.1  christos   if (BE (*err != REG_NOERROR, 0))
   2462  1.1  christos     return NULL;
   2463  1.1  christos   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
   2464  1.1  christos     {
   2465  1.1  christos       Idx cur_node = state->nodes.elems[node_cnt];
   2466  1.1  christos       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
   2467  1.1  christos 	{
   2468  1.1  christos 	  *err = re_node_set_merge (&next_nodes,
   2469  1.1  christos 				    dfa->eclosures + dfa->nexts[cur_node]);
   2470  1.1  christos 	  if (BE (*err != REG_NOERROR, 0))
   2471  1.1  christos 	    {
   2472  1.1  christos 	      re_node_set_free (&next_nodes);
   2473  1.1  christos 	      return NULL;
   2474  1.1  christos 	    }
   2475  1.1  christos 	}
   2476  1.1  christos     }
   2477  1.1  christos   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
   2478  1.1  christos   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
   2479  1.1  christos   /* We don't need to check errors here, since the return value of
   2480  1.1  christos      this function is next_state and ERR is already set.  */
   2481  1.1  christos 
   2482  1.1  christos   re_node_set_free (&next_nodes);
   2483  1.1  christos   re_string_skip_bytes (&mctx->input, 1);
   2484  1.1  christos   return next_state;
   2485  1.1  christos }
   2486  1.1  christos #endif
   2487  1.1  christos 
   2488  1.1  christos #ifdef RE_ENABLE_I18N
   2489  1.1  christos static reg_errcode_t
   2490  1.1  christos internal_function
   2491  1.1  christos transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
   2492  1.1  christos {
   2493  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2494  1.1  christos   reg_errcode_t err;
   2495  1.1  christos   Idx i;
   2496  1.1  christos 
   2497  1.1  christos   for (i = 0; i < pstate->nodes.nelem; ++i)
   2498  1.1  christos     {
   2499  1.1  christos       re_node_set dest_nodes, *new_nodes;
   2500  1.1  christos       Idx cur_node_idx = pstate->nodes.elems[i];
   2501  1.1  christos       int naccepted;
   2502  1.1  christos       Idx dest_idx;
   2503  1.1  christos       unsigned int context;
   2504  1.1  christos       re_dfastate_t *dest_state;
   2505  1.1  christos 
   2506  1.1  christos       if (!dfa->nodes[cur_node_idx].accept_mb)
   2507  1.1  christos         continue;
   2508  1.1  christos 
   2509  1.1  christos       if (dfa->nodes[cur_node_idx].constraint)
   2510  1.1  christos 	{
   2511  1.1  christos 	  context = re_string_context_at (&mctx->input,
   2512  1.1  christos 					  re_string_cur_idx (&mctx->input),
   2513  1.1  christos 					  mctx->eflags);
   2514  1.1  christos 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
   2515  1.1  christos 					   context))
   2516  1.1  christos 	    continue;
   2517  1.1  christos 	}
   2518  1.1  christos 
   2519  1.1  christos       /* How many bytes the node can accept?  */
   2520  1.1  christos       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
   2521  1.1  christos 					   re_string_cur_idx (&mctx->input));
   2522  1.1  christos       if (naccepted == 0)
   2523  1.1  christos 	continue;
   2524  1.1  christos 
   2525  1.1  christos       /* The node can accepts `naccepted' bytes.  */
   2526  1.1  christos       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
   2527  1.1  christos       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
   2528  1.1  christos 			       : mctx->max_mb_elem_len);
   2529  1.1  christos       err = clean_state_log_if_needed (mctx, dest_idx);
   2530  1.1  christos       if (BE (err != REG_NOERROR, 0))
   2531  1.1  christos 	return err;
   2532  1.1  christos #ifdef DEBUG
   2533  1.1  christos       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
   2534  1.1  christos #endif
   2535  1.1  christos       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
   2536  1.1  christos 
   2537  1.1  christos       dest_state = mctx->state_log[dest_idx];
   2538  1.1  christos       if (dest_state == NULL)
   2539  1.1  christos 	dest_nodes = *new_nodes;
   2540  1.1  christos       else
   2541  1.1  christos 	{
   2542  1.1  christos 	  err = re_node_set_init_union (&dest_nodes,
   2543  1.1  christos 					dest_state->entrance_nodes, new_nodes);
   2544  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2545  1.1  christos 	    return err;
   2546  1.1  christos 	}
   2547  1.1  christos       context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
   2548  1.1  christos       mctx->state_log[dest_idx]
   2549  1.1  christos 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
   2550  1.1  christos       if (dest_state != NULL)
   2551  1.1  christos 	re_node_set_free (&dest_nodes);
   2552  1.1  christos       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
   2553  1.1  christos 	return err;
   2554  1.1  christos     }
   2555  1.1  christos   return REG_NOERROR;
   2556  1.1  christos }
   2557  1.1  christos #endif /* RE_ENABLE_I18N */
   2558  1.1  christos 
   2559  1.1  christos static reg_errcode_t
   2560  1.1  christos internal_function
   2561  1.1  christos transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
   2562  1.1  christos {
   2563  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2564  1.1  christos   reg_errcode_t err;
   2565  1.1  christos   Idx i;
   2566  1.1  christos   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   2567  1.1  christos 
   2568  1.1  christos   for (i = 0; i < nodes->nelem; ++i)
   2569  1.1  christos     {
   2570  1.1  christos       Idx dest_str_idx, prev_nelem, bkc_idx;
   2571  1.1  christos       Idx node_idx = nodes->elems[i];
   2572  1.1  christos       unsigned int context;
   2573  1.1  christos       const re_token_t *node = dfa->nodes + node_idx;
   2574  1.1  christos       re_node_set *new_dest_nodes;
   2575  1.1  christos 
   2576  1.1  christos       /* Check whether `node' is a backreference or not.  */
   2577  1.1  christos       if (node->type != OP_BACK_REF)
   2578  1.1  christos 	continue;
   2579  1.1  christos 
   2580  1.1  christos       if (node->constraint)
   2581  1.1  christos 	{
   2582  1.1  christos 	  context = re_string_context_at (&mctx->input, cur_str_idx,
   2583  1.1  christos 					  mctx->eflags);
   2584  1.1  christos 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
   2585  1.1  christos 	    continue;
   2586  1.1  christos 	}
   2587  1.1  christos 
   2588  1.1  christos       /* `node' is a backreference.
   2589  1.1  christos 	 Check the substring which the substring matched.  */
   2590  1.1  christos       bkc_idx = mctx->nbkref_ents;
   2591  1.1  christos       err = get_subexp (mctx, node_idx, cur_str_idx);
   2592  1.1  christos       if (BE (err != REG_NOERROR, 0))
   2593  1.1  christos 	goto free_return;
   2594  1.1  christos 
   2595  1.1  christos       /* And add the epsilon closures (which is `new_dest_nodes') of
   2596  1.1  christos 	 the backreference to appropriate state_log.  */
   2597  1.1  christos #ifdef DEBUG
   2598  1.1  christos       assert (dfa->nexts[node_idx] != REG_MISSING);
   2599  1.1  christos #endif
   2600  1.1  christos       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
   2601  1.1  christos 	{
   2602  1.1  christos 	  Idx subexp_len;
   2603  1.1  christos 	  re_dfastate_t *dest_state;
   2604  1.1  christos 	  struct re_backref_cache_entry *bkref_ent;
   2605  1.1  christos 	  bkref_ent = mctx->bkref_ents + bkc_idx;
   2606  1.1  christos 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
   2607  1.1  christos 	    continue;
   2608  1.1  christos 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
   2609  1.1  christos 	  new_dest_nodes = (subexp_len == 0
   2610  1.1  christos 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
   2611  1.1  christos 			    : dfa->eclosures + dfa->nexts[node_idx]);
   2612  1.1  christos 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
   2613  1.1  christos 			  - bkref_ent->subexp_from);
   2614  1.1  christos 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
   2615  1.1  christos 					  mctx->eflags);
   2616  1.1  christos 	  dest_state = mctx->state_log[dest_str_idx];
   2617  1.1  christos 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
   2618  1.1  christos 			: mctx->state_log[cur_str_idx]->nodes.nelem);
   2619  1.1  christos 	  /* Add `new_dest_node' to state_log.  */
   2620  1.1  christos 	  if (dest_state == NULL)
   2621  1.1  christos 	    {
   2622  1.1  christos 	      mctx->state_log[dest_str_idx]
   2623  1.1  christos 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
   2624  1.1  christos 					    context);
   2625  1.1  christos 	      if (BE (mctx->state_log[dest_str_idx] == NULL
   2626  1.1  christos 		      && err != REG_NOERROR, 0))
   2627  1.1  christos 		goto free_return;
   2628  1.1  christos 	    }
   2629  1.1  christos 	  else
   2630  1.1  christos 	    {
   2631  1.1  christos 	      re_node_set dest_nodes;
   2632  1.1  christos 	      err = re_node_set_init_union (&dest_nodes,
   2633  1.1  christos 					    dest_state->entrance_nodes,
   2634  1.1  christos 					    new_dest_nodes);
   2635  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2636  1.1  christos 		{
   2637  1.1  christos 		  re_node_set_free (&dest_nodes);
   2638  1.1  christos 		  goto free_return;
   2639  1.1  christos 		}
   2640  1.1  christos 	      mctx->state_log[dest_str_idx]
   2641  1.1  christos 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
   2642  1.1  christos 	      re_node_set_free (&dest_nodes);
   2643  1.1  christos 	      if (BE (mctx->state_log[dest_str_idx] == NULL
   2644  1.1  christos 		      && err != REG_NOERROR, 0))
   2645  1.1  christos 		goto free_return;
   2646  1.1  christos 	    }
   2647  1.1  christos 	  /* We need to check recursively if the backreference can epsilon
   2648  1.1  christos 	     transit.  */
   2649  1.1  christos 	  if (subexp_len == 0
   2650  1.1  christos 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
   2651  1.1  christos 	    {
   2652  1.1  christos 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
   2653  1.1  christos 					       cur_str_idx);
   2654  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2655  1.1  christos 		goto free_return;
   2656  1.1  christos 	      err = transit_state_bkref (mctx, new_dest_nodes);
   2657  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   2658  1.1  christos 		goto free_return;
   2659  1.1  christos 	    }
   2660  1.1  christos 	}
   2661  1.1  christos     }
   2662  1.1  christos   err = REG_NOERROR;
   2663  1.1  christos  free_return:
   2664  1.1  christos   return err;
   2665  1.1  christos }
   2666  1.1  christos 
   2667  1.1  christos /* Enumerate all the candidates which the backreference BKREF_NODE can match
   2668  1.1  christos    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
   2669  1.1  christos    Note that we might collect inappropriate candidates here.
   2670  1.1  christos    However, the cost of checking them strictly here is too high, then we
   2671  1.1  christos    delay these checking for prune_impossible_nodes().  */
   2672  1.1  christos 
   2673  1.1  christos static reg_errcode_t
   2674  1.1  christos internal_function
   2675  1.1  christos get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
   2676  1.1  christos {
   2677  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2678  1.1  christos   Idx subexp_num, sub_top_idx;
   2679  1.1  christos   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
   2680  1.1  christos   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
   2681  1.1  christos   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
   2682  1.1  christos   if (cache_idx != REG_MISSING)
   2683  1.1  christos     {
   2684  1.1  christos       const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
   2685  1.1  christos       do
   2686  1.1  christos         if (entry->node == bkref_node)
   2687  1.1  christos 	  return REG_NOERROR; /* We already checked it.  */
   2688  1.1  christos       while (entry++->more);
   2689  1.1  christos     }
   2690  1.1  christos 
   2691  1.1  christos   subexp_num = dfa->nodes[bkref_node].opr.idx;
   2692  1.1  christos 
   2693  1.1  christos   /* For each sub expression  */
   2694  1.1  christos   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
   2695  1.1  christos     {
   2696  1.1  christos       reg_errcode_t err;
   2697  1.1  christos       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
   2698  1.1  christos       re_sub_match_last_t *sub_last;
   2699  1.1  christos       Idx sub_last_idx, sl_str, bkref_str_off;
   2700  1.1  christos 
   2701  1.1  christos       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
   2702  1.1  christos 	continue; /* It isn't related.  */
   2703  1.1  christos 
   2704  1.1  christos       sl_str = sub_top->str_idx;
   2705  1.1  christos       bkref_str_off = bkref_str_idx;
   2706  1.1  christos       /* At first, check the last node of sub expressions we already
   2707  1.1  christos 	 evaluated.  */
   2708  1.1  christos       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
   2709  1.1  christos 	{
   2710  1.1  christos 	  regoff_t sl_str_diff;
   2711  1.1  christos 	  sub_last = sub_top->lasts[sub_last_idx];
   2712  1.1  christos 	  sl_str_diff = sub_last->str_idx - sl_str;
   2713  1.1  christos 	  /* The matched string by the sub expression match with the substring
   2714  1.1  christos 	     at the back reference?  */
   2715  1.1  christos 	  if (sl_str_diff > 0)
   2716  1.1  christos 	    {
   2717  1.1  christos 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
   2718  1.1  christos 		{
   2719  1.1  christos 		  /* Not enough chars for a successful match.  */
   2720  1.1  christos 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
   2721  1.1  christos 		    break;
   2722  1.1  christos 
   2723  1.1  christos 		  err = clean_state_log_if_needed (mctx,
   2724  1.1  christos 						   bkref_str_off
   2725  1.1  christos 						   + sl_str_diff);
   2726  1.1  christos 		  if (BE (err != REG_NOERROR, 0))
   2727  1.1  christos 		    return err;
   2728  1.1  christos 		  buf = (const char *) re_string_get_buffer (&mctx->input);
   2729  1.1  christos 		}
   2730  1.1  christos 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
   2731  1.1  christos 		break; /* We don't need to search this sub expression any more.  */
   2732  1.1  christos 	    }
   2733  1.1  christos 	  bkref_str_off += sl_str_diff;
   2734  1.1  christos 	  sl_str += sl_str_diff;
   2735  1.1  christos 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
   2736  1.1  christos 				bkref_str_idx);
   2737  1.1  christos 
   2738  1.1  christos 	  /* Reload buf, since the preceding call might have reallocated
   2739  1.1  christos 	     the buffer.  */
   2740  1.1  christos 	  buf = (const char *) re_string_get_buffer (&mctx->input);
   2741  1.1  christos 
   2742  1.1  christos 	  if (err == REG_NOMATCH)
   2743  1.1  christos 	    continue;
   2744  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2745  1.1  christos 	    return err;
   2746  1.1  christos 	}
   2747  1.1  christos 
   2748  1.1  christos       if (sub_last_idx < sub_top->nlasts)
   2749  1.1  christos 	continue;
   2750  1.1  christos       if (sub_last_idx > 0)
   2751  1.1  christos 	++sl_str;
   2752  1.1  christos       /* Then, search for the other last nodes of the sub expression.  */
   2753  1.1  christos       for (; sl_str <= bkref_str_idx; ++sl_str)
   2754  1.1  christos 	{
   2755  1.1  christos 	  Idx cls_node;
   2756  1.1  christos 	  regoff_t sl_str_off;
   2757  1.1  christos 	  const re_node_set *nodes;
   2758  1.1  christos 	  sl_str_off = sl_str - sub_top->str_idx;
   2759  1.1  christos 	  /* The matched string by the sub expression match with the substring
   2760  1.1  christos 	     at the back reference?  */
   2761  1.1  christos 	  if (sl_str_off > 0)
   2762  1.1  christos 	    {
   2763  1.1  christos 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
   2764  1.1  christos 		{
   2765  1.1  christos 		  /* If we are at the end of the input, we cannot match.  */
   2766  1.1  christos 		  if (bkref_str_off >= mctx->input.len)
   2767  1.1  christos 		    break;
   2768  1.1  christos 
   2769  1.1  christos 		  err = extend_buffers (mctx);
   2770  1.1  christos 		  if (BE (err != REG_NOERROR, 0))
   2771  1.1  christos 		    return err;
   2772  1.1  christos 
   2773  1.1  christos 		  buf = (const char *) re_string_get_buffer (&mctx->input);
   2774  1.1  christos 		}
   2775  1.1  christos 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
   2776  1.1  christos 		break; /* We don't need to search this sub expression
   2777  1.1  christos 			  any more.  */
   2778  1.1  christos 	    }
   2779  1.1  christos 	  if (mctx->state_log[sl_str] == NULL)
   2780  1.1  christos 	    continue;
   2781  1.1  christos 	  /* Does this state have a ')' of the sub expression?  */
   2782  1.1  christos 	  nodes = &mctx->state_log[sl_str]->nodes;
   2783  1.1  christos 	  cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
   2784  1.1  christos 	  if (cls_node == REG_MISSING)
   2785  1.1  christos 	    continue; /* No.  */
   2786  1.1  christos 	  if (sub_top->path == NULL)
   2787  1.1  christos 	    {
   2788  1.1  christos 	      sub_top->path = re_calloc (state_array_t,
   2789  1.1  christos 					 sl_str - sub_top->str_idx + 1);
   2790  1.1  christos 	      if (sub_top->path == NULL)
   2791  1.1  christos 		return REG_ESPACE;
   2792  1.1  christos 	    }
   2793  1.1  christos 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
   2794  1.1  christos 	     in the current context?  */
   2795  1.1  christos 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
   2796  1.1  christos 			       sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
   2797  1.1  christos 	  if (err == REG_NOMATCH)
   2798  1.1  christos 	      continue;
   2799  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2800  1.1  christos 	      return err;
   2801  1.1  christos 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
   2802  1.1  christos 	  if (BE (sub_last == NULL, 0))
   2803  1.1  christos 	    return REG_ESPACE;
   2804  1.1  christos 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
   2805  1.1  christos 				bkref_str_idx);
   2806  1.1  christos 	  if (err == REG_NOMATCH)
   2807  1.1  christos 	    continue;
   2808  1.1  christos 	}
   2809  1.1  christos     }
   2810  1.1  christos   return REG_NOERROR;
   2811  1.1  christos }
   2812  1.1  christos 
   2813  1.1  christos /* Helper functions for get_subexp().  */
   2814  1.1  christos 
   2815  1.1  christos /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
   2816  1.1  christos    If it can arrive, register the sub expression expressed with SUB_TOP
   2817  1.1  christos    and SUB_LAST.  */
   2818  1.1  christos 
   2819  1.1  christos static reg_errcode_t
   2820  1.1  christos internal_function
   2821  1.1  christos get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
   2822  1.1  christos 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
   2823  1.1  christos {
   2824  1.1  christos   reg_errcode_t err;
   2825  1.1  christos   Idx to_idx;
   2826  1.1  christos   /* Can the subexpression arrive the back reference?  */
   2827  1.1  christos   err = check_arrival (mctx, &sub_last->path, sub_last->node,
   2828  1.1  christos 		       sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
   2829  1.1  christos   if (err != REG_NOERROR)
   2830  1.1  christos     return err;
   2831  1.1  christos   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
   2832  1.1  christos 			     sub_last->str_idx);
   2833  1.1  christos   if (BE (err != REG_NOERROR, 0))
   2834  1.1  christos     return err;
   2835  1.1  christos   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
   2836  1.1  christos   return clean_state_log_if_needed (mctx, to_idx);
   2837  1.1  christos }
   2838  1.1  christos 
   2839  1.1  christos /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
   2840  1.1  christos    Search '(' if FL_OPEN, or search ')' otherwise.
   2841  1.1  christos    TODO: This function isn't efficient...
   2842  1.1  christos 	 Because there might be more than one nodes whose types are
   2843  1.1  christos 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
   2844  1.1  christos 	 nodes.
   2845  1.1  christos 	 E.g. RE: (a){2}  */
   2846  1.1  christos 
   2847  1.1  christos static Idx
   2848  1.1  christos internal_function
   2849  1.1  christos find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
   2850  1.1  christos 		  Idx subexp_idx, int type)
   2851  1.1  christos {
   2852  1.1  christos   Idx cls_idx;
   2853  1.1  christos   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
   2854  1.1  christos     {
   2855  1.1  christos       Idx cls_node = nodes->elems[cls_idx];
   2856  1.1  christos       const re_token_t *node = dfa->nodes + cls_node;
   2857  1.1  christos       if (node->type == type
   2858  1.1  christos 	  && node->opr.idx == subexp_idx)
   2859  1.1  christos 	return cls_node;
   2860  1.1  christos     }
   2861  1.1  christos   return REG_MISSING;
   2862  1.1  christos }
   2863  1.1  christos 
   2864  1.1  christos /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
   2865  1.1  christos    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
   2866  1.1  christos    heavily reused.
   2867  1.1  christos    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
   2868  1.1  christos 
   2869  1.1  christos static reg_errcode_t
   2870  1.1  christos internal_function
   2871  1.1  christos check_arrival (re_match_context_t *mctx, state_array_t *path,
   2872  1.1  christos 	       Idx top_node, Idx top_str, Idx last_node, Idx last_str,
   2873  1.1  christos 	       int type)
   2874  1.1  christos {
   2875  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   2876  1.1  christos   reg_errcode_t err;
   2877  1.1  christos   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
   2878  1.1  christos   re_dfastate_t *cur_state = NULL;
   2879  1.1  christos   re_node_set *cur_nodes, next_nodes;
   2880  1.1  christos   re_dfastate_t **backup_state_log;
   2881  1.1  christos   unsigned int context;
   2882  1.1  christos 
   2883  1.1  christos   subexp_num = dfa->nodes[top_node].opr.idx;
   2884  1.1  christos   /* Extend the buffer if we need.  */
   2885  1.1  christos   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
   2886  1.1  christos     {
   2887  1.1  christos       re_dfastate_t **new_array;
   2888  1.1  christos       Idx old_alloc = path->alloc;
   2889  1.1  christos       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
   2890  1.1  christos       if (BE (new_alloc < old_alloc, 0))
   2891  1.1  christos 	return REG_ESPACE;
   2892  1.1  christos       new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc);
   2893  1.1  christos       if (BE (new_array == NULL, 0))
   2894  1.1  christos 	return REG_ESPACE;
   2895  1.1  christos       path->array = new_array;
   2896  1.1  christos       path->alloc = new_alloc;
   2897  1.1  christos       memset (new_array + old_alloc, '\0',
   2898  1.1  christos 	      sizeof (re_dfastate_t *) * (new_alloc - old_alloc));
   2899  1.1  christos     }
   2900  1.1  christos 
   2901  1.1  christos   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
   2902  1.1  christos 
   2903  1.1  christos   /* Temporary modify MCTX.  */
   2904  1.1  christos   backup_state_log = mctx->state_log;
   2905  1.1  christos   backup_cur_idx = mctx->input.cur_idx;
   2906  1.1  christos   mctx->state_log = path->array;
   2907  1.1  christos   mctx->input.cur_idx = str_idx;
   2908  1.1  christos 
   2909  1.1  christos   /* Setup initial node set.  */
   2910  1.1  christos   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
   2911  1.1  christos   if (str_idx == top_str)
   2912  1.1  christos     {
   2913  1.1  christos       err = re_node_set_init_1 (&next_nodes, top_node);
   2914  1.1  christos       if (BE (err != REG_NOERROR, 0))
   2915  1.1  christos 	return err;
   2916  1.1  christos       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
   2917  1.1  christos       if (BE (err != REG_NOERROR, 0))
   2918  1.1  christos 	{
   2919  1.1  christos 	  re_node_set_free (&next_nodes);
   2920  1.1  christos 	  return err;
   2921  1.1  christos 	}
   2922  1.1  christos     }
   2923  1.1  christos   else
   2924  1.1  christos     {
   2925  1.1  christos       cur_state = mctx->state_log[str_idx];
   2926  1.1  christos       if (cur_state && cur_state->has_backref)
   2927  1.1  christos 	{
   2928  1.1  christos 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
   2929  1.1  christos 	  if (BE ( err != REG_NOERROR, 0))
   2930  1.1  christos 	    return err;
   2931  1.1  christos 	}
   2932  1.1  christos       else
   2933  1.1  christos 	re_node_set_init_empty (&next_nodes);
   2934  1.1  christos     }
   2935  1.1  christos   if (str_idx == top_str || (cur_state && cur_state->has_backref))
   2936  1.1  christos     {
   2937  1.1  christos       if (next_nodes.nelem)
   2938  1.1  christos 	{
   2939  1.1  christos 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
   2940  1.1  christos 				    subexp_num, type);
   2941  1.1  christos 	  if (BE ( err != REG_NOERROR, 0))
   2942  1.1  christos 	    {
   2943  1.1  christos 	      re_node_set_free (&next_nodes);
   2944  1.1  christos 	      return err;
   2945  1.1  christos 	    }
   2946  1.1  christos 	}
   2947  1.1  christos       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
   2948  1.1  christos       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
   2949  1.1  christos 	{
   2950  1.1  christos 	  re_node_set_free (&next_nodes);
   2951  1.1  christos 	  return err;
   2952  1.1  christos 	}
   2953  1.1  christos       mctx->state_log[str_idx] = cur_state;
   2954  1.1  christos     }
   2955  1.1  christos 
   2956  1.1  christos   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
   2957  1.1  christos     {
   2958  1.1  christos       re_node_set_empty (&next_nodes);
   2959  1.1  christos       if (mctx->state_log[str_idx + 1])
   2960  1.1  christos 	{
   2961  1.1  christos 	  err = re_node_set_merge (&next_nodes,
   2962  1.1  christos 				   &mctx->state_log[str_idx + 1]->nodes);
   2963  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2964  1.1  christos 	    {
   2965  1.1  christos 	      re_node_set_free (&next_nodes);
   2966  1.1  christos 	      return err;
   2967  1.1  christos 	    }
   2968  1.1  christos 	}
   2969  1.1  christos       if (cur_state)
   2970  1.1  christos 	{
   2971  1.1  christos 	  err = check_arrival_add_next_nodes (mctx, str_idx,
   2972  1.1  christos 					      &cur_state->non_eps_nodes, &next_nodes);
   2973  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2974  1.1  christos 	    {
   2975  1.1  christos 	      re_node_set_free (&next_nodes);
   2976  1.1  christos 	      return err;
   2977  1.1  christos 	    }
   2978  1.1  christos 	}
   2979  1.1  christos       ++str_idx;
   2980  1.1  christos       if (next_nodes.nelem)
   2981  1.1  christos 	{
   2982  1.1  christos 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
   2983  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   2984  1.1  christos 	    {
   2985  1.1  christos 	      re_node_set_free (&next_nodes);
   2986  1.1  christos 	      return err;
   2987  1.1  christos 	    }
   2988  1.1  christos 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
   2989  1.1  christos 				    subexp_num, type);
   2990  1.1  christos 	  if (BE ( err != REG_NOERROR, 0))
   2991  1.1  christos 	    {
   2992  1.1  christos 	      re_node_set_free (&next_nodes);
   2993  1.1  christos 	      return err;
   2994  1.1  christos 	    }
   2995  1.1  christos 	}
   2996  1.1  christos       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
   2997  1.1  christos       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
   2998  1.1  christos       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
   2999  1.1  christos 	{
   3000  1.1  christos 	  re_node_set_free (&next_nodes);
   3001  1.1  christos 	  return err;
   3002  1.1  christos 	}
   3003  1.1  christos       mctx->state_log[str_idx] = cur_state;
   3004  1.1  christos       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
   3005  1.1  christos     }
   3006  1.1  christos   re_node_set_free (&next_nodes);
   3007  1.1  christos   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
   3008  1.1  christos 	       : &mctx->state_log[last_str]->nodes);
   3009  1.1  christos   path->next_idx = str_idx;
   3010  1.1  christos 
   3011  1.1  christos   /* Fix MCTX.  */
   3012  1.1  christos   mctx->state_log = backup_state_log;
   3013  1.1  christos   mctx->input.cur_idx = backup_cur_idx;
   3014  1.1  christos 
   3015  1.1  christos   /* Then check the current node set has the node LAST_NODE.  */
   3016  1.1  christos   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
   3017  1.1  christos     return REG_NOERROR;
   3018  1.1  christos 
   3019  1.1  christos   return REG_NOMATCH;
   3020  1.1  christos }
   3021  1.1  christos 
   3022  1.1  christos /* Helper functions for check_arrival.  */
   3023  1.1  christos 
   3024  1.1  christos /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
   3025  1.1  christos    to NEXT_NODES.
   3026  1.1  christos    TODO: This function is similar to the functions transit_state*(),
   3027  1.1  christos 	 however this function has many additional works.
   3028  1.1  christos 	 Can't we unify them?  */
   3029  1.1  christos 
   3030  1.1  christos static reg_errcode_t
   3031  1.1  christos internal_function
   3032  1.1  christos check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
   3033  1.1  christos 			      re_node_set *cur_nodes,
   3034  1.1  christos 			      re_node_set *next_nodes)
   3035  1.1  christos {
   3036  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   3037  1.1  christos   bool ok;
   3038  1.1  christos   Idx cur_idx;
   3039  1.1  christos   reg_errcode_t err;
   3040  1.1  christos   re_node_set union_set;
   3041  1.1  christos   re_node_set_init_empty (&union_set);
   3042  1.1  christos   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
   3043  1.1  christos     {
   3044  1.1  christos       int naccepted = 0;
   3045  1.1  christos       Idx cur_node = cur_nodes->elems[cur_idx];
   3046  1.1  christos #ifdef DEBUG
   3047  1.1  christos       re_token_type_t type = dfa->nodes[cur_node].type;
   3048  1.1  christos       assert (!IS_EPSILON_NODE (type));
   3049  1.1  christos #endif
   3050  1.1  christos #ifdef RE_ENABLE_I18N
   3051  1.1  christos       /* If the node may accept `multi byte'.  */
   3052  1.1  christos       if (dfa->nodes[cur_node].accept_mb)
   3053  1.1  christos 	{
   3054  1.1  christos 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
   3055  1.1  christos 					       str_idx);
   3056  1.1  christos 	  if (naccepted > 1)
   3057  1.1  christos 	    {
   3058  1.1  christos 	      re_dfastate_t *dest_state;
   3059  1.1  christos 	      Idx next_node = dfa->nexts[cur_node];
   3060  1.1  christos 	      Idx next_idx = str_idx + naccepted;
   3061  1.1  christos 	      dest_state = mctx->state_log[next_idx];
   3062  1.1  christos 	      re_node_set_empty (&union_set);
   3063  1.1  christos 	      if (dest_state)
   3064  1.1  christos 		{
   3065  1.1  christos 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
   3066  1.1  christos 		  if (BE (err != REG_NOERROR, 0))
   3067  1.1  christos 		    {
   3068  1.1  christos 		      re_node_set_free (&union_set);
   3069  1.1  christos 		      return err;
   3070  1.1  christos 		    }
   3071  1.1  christos 		}
   3072  1.1  christos 	      ok = re_node_set_insert (&union_set, next_node);
   3073  1.1  christos 	      if (BE (! ok, 0))
   3074  1.1  christos 		{
   3075  1.1  christos 		  re_node_set_free (&union_set);
   3076  1.1  christos 		  return REG_ESPACE;
   3077  1.1  christos 		}
   3078  1.1  christos 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
   3079  1.1  christos 							    &union_set);
   3080  1.1  christos 	      if (BE (mctx->state_log[next_idx] == NULL
   3081  1.1  christos 		      && err != REG_NOERROR, 0))
   3082  1.1  christos 		{
   3083  1.1  christos 		  re_node_set_free (&union_set);
   3084  1.1  christos 		  return err;
   3085  1.1  christos 		}
   3086  1.1  christos 	    }
   3087  1.1  christos 	}
   3088  1.1  christos #endif /* RE_ENABLE_I18N */
   3089  1.1  christos       if (naccepted
   3090  1.1  christos 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
   3091  1.1  christos 	{
   3092  1.1  christos 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
   3093  1.1  christos 	  if (BE (! ok, 0))
   3094  1.1  christos 	    {
   3095  1.1  christos 	      re_node_set_free (&union_set);
   3096  1.1  christos 	      return REG_ESPACE;
   3097  1.1  christos 	    }
   3098  1.1  christos 	}
   3099  1.1  christos     }
   3100  1.1  christos   re_node_set_free (&union_set);
   3101  1.1  christos   return REG_NOERROR;
   3102  1.1  christos }
   3103  1.1  christos 
   3104  1.1  christos /* For all the nodes in CUR_NODES, add the epsilon closures of them to
   3105  1.1  christos    CUR_NODES, however exclude the nodes which are:
   3106  1.1  christos     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
   3107  1.1  christos     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
   3108  1.1  christos */
   3109  1.1  christos 
   3110  1.1  christos static reg_errcode_t
   3111  1.1  christos internal_function
   3112  1.1  christos check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
   3113  1.1  christos 			  Idx ex_subexp, int type)
   3114  1.1  christos {
   3115  1.1  christos   reg_errcode_t err;
   3116  1.1  christos   Idx idx, outside_node;
   3117  1.1  christos   re_node_set new_nodes;
   3118  1.1  christos #ifdef DEBUG
   3119  1.1  christos   assert (cur_nodes->nelem);
   3120  1.1  christos #endif
   3121  1.1  christos   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
   3122  1.1  christos   if (BE (err != REG_NOERROR, 0))
   3123  1.1  christos     return err;
   3124  1.1  christos   /* Create a new node set NEW_NODES with the nodes which are epsilon
   3125  1.1  christos      closures of the node in CUR_NODES.  */
   3126  1.1  christos 
   3127  1.1  christos   for (idx = 0; idx < cur_nodes->nelem; ++idx)
   3128  1.1  christos     {
   3129  1.1  christos       Idx cur_node = cur_nodes->elems[idx];
   3130  1.1  christos       re_node_set *eclosure = dfa->eclosures + cur_node;
   3131  1.1  christos       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
   3132  1.1  christos       if (outside_node == REG_MISSING)
   3133  1.1  christos 	{
   3134  1.1  christos 	  /* There are no problematic nodes, just merge them.  */
   3135  1.1  christos 	  err = re_node_set_merge (&new_nodes, eclosure);
   3136  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   3137  1.1  christos 	    {
   3138  1.1  christos 	      re_node_set_free (&new_nodes);
   3139  1.1  christos 	      return err;
   3140  1.1  christos 	    }
   3141  1.1  christos 	}
   3142  1.1  christos       else
   3143  1.1  christos 	{
   3144  1.1  christos 	  /* There are problematic nodes, re-calculate incrementally.  */
   3145  1.1  christos 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
   3146  1.1  christos 					      ex_subexp, type);
   3147  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   3148  1.1  christos 	    {
   3149  1.1  christos 	      re_node_set_free (&new_nodes);
   3150  1.1  christos 	      return err;
   3151  1.1  christos 	    }
   3152  1.1  christos 	}
   3153  1.1  christos     }
   3154  1.1  christos   re_node_set_free (cur_nodes);
   3155  1.1  christos   *cur_nodes = new_nodes;
   3156  1.1  christos   return REG_NOERROR;
   3157  1.1  christos }
   3158  1.1  christos 
   3159  1.1  christos /* Helper function for check_arrival_expand_ecl.
   3160  1.1  christos    Check incrementally the epsilon closure of TARGET, and if it isn't
   3161  1.1  christos    problematic append it to DST_NODES.  */
   3162  1.1  christos 
   3163  1.1  christos static reg_errcode_t
   3164  1.1  christos internal_function
   3165  1.1  christos check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
   3166  1.1  christos 			      Idx target, Idx ex_subexp, int type)
   3167  1.1  christos {
   3168  1.1  christos   Idx cur_node;
   3169  1.1  christos   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
   3170  1.1  christos     {
   3171  1.1  christos       bool ok;
   3172  1.1  christos 
   3173  1.1  christos       if (dfa->nodes[cur_node].type == type
   3174  1.1  christos 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
   3175  1.1  christos 	{
   3176  1.1  christos 	  if (type == OP_CLOSE_SUBEXP)
   3177  1.1  christos 	    {
   3178  1.1  christos 	      ok = re_node_set_insert (dst_nodes, cur_node);
   3179  1.1  christos 	      if (BE (! ok, 0))
   3180  1.1  christos 		return REG_ESPACE;
   3181  1.1  christos 	    }
   3182  1.1  christos 	  break;
   3183  1.1  christos 	}
   3184  1.1  christos       ok = re_node_set_insert (dst_nodes, cur_node);
   3185  1.1  christos       if (BE (! ok, 0))
   3186  1.1  christos 	return REG_ESPACE;
   3187  1.1  christos       if (dfa->edests[cur_node].nelem == 0)
   3188  1.1  christos 	break;
   3189  1.1  christos       if (dfa->edests[cur_node].nelem == 2)
   3190  1.1  christos 	{
   3191  1.1  christos 	  reg_errcode_t ret =
   3192  1.1  christos 	    check_arrival_expand_ecl_sub (dfa, dst_nodes,
   3193  1.1  christos 					  dfa->edests[cur_node].elems[1],
   3194  1.1  christos 					  ex_subexp, type);
   3195  1.1  christos 	  if (BE (ret != REG_NOERROR, 0))
   3196  1.1  christos 	    return ret;
   3197  1.1  christos 	}
   3198  1.1  christos       cur_node = dfa->edests[cur_node].elems[0];
   3199  1.1  christos     }
   3200  1.1  christos   return REG_NOERROR;
   3201  1.1  christos }
   3202  1.1  christos 
   3203  1.1  christos 
   3204  1.1  christos /* For all the back references in the current state, calculate the
   3205  1.1  christos    destination of the back references by the appropriate entry
   3206  1.1  christos    in MCTX->BKREF_ENTS.  */
   3207  1.1  christos 
   3208  1.1  christos static reg_errcode_t
   3209  1.1  christos internal_function
   3210  1.1  christos expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
   3211  1.1  christos 		    Idx cur_str, Idx subexp_num, int type)
   3212  1.1  christos {
   3213  1.1  christos   re_dfa_t *const dfa = mctx->dfa;
   3214  1.1  christos   reg_errcode_t err;
   3215  1.1  christos   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
   3216  1.1  christos   struct re_backref_cache_entry *ent;
   3217  1.1  christos 
   3218  1.1  christos   if (cache_idx_start == REG_MISSING)
   3219  1.1  christos     return REG_NOERROR;
   3220  1.1  christos 
   3221  1.1  christos  restart:
   3222  1.1  christos   ent = mctx->bkref_ents + cache_idx_start;
   3223  1.1  christos   do
   3224  1.1  christos     {
   3225  1.1  christos       Idx to_idx, next_node;
   3226  1.1  christos 
   3227  1.1  christos       /* Is this entry ENT is appropriate?  */
   3228  1.1  christos       if (!re_node_set_contains (cur_nodes, ent->node))
   3229  1.1  christos 	continue; /* No.  */
   3230  1.1  christos 
   3231  1.1  christos       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
   3232  1.1  christos       /* Calculate the destination of the back reference, and append it
   3233  1.1  christos 	 to MCTX->STATE_LOG.  */
   3234  1.1  christos       if (to_idx == cur_str)
   3235  1.1  christos 	{
   3236  1.1  christos 	  /* The backreference did epsilon transit, we must re-check all the
   3237  1.1  christos 	     node in the current state.  */
   3238  1.1  christos 	  re_node_set new_dests;
   3239  1.1  christos 	  reg_errcode_t err2, err3;
   3240  1.1  christos 	  next_node = dfa->edests[ent->node].elems[0];
   3241  1.1  christos 	  if (re_node_set_contains (cur_nodes, next_node))
   3242  1.1  christos 	    continue;
   3243  1.1  christos 	  err = re_node_set_init_1 (&new_dests, next_node);
   3244  1.1  christos 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
   3245  1.1  christos 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
   3246  1.1  christos 	  re_node_set_free (&new_dests);
   3247  1.1  christos 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
   3248  1.1  christos 		  || err3 != REG_NOERROR, 0))
   3249  1.1  christos 	    {
   3250  1.1  christos 	      err = (err != REG_NOERROR ? err
   3251  1.1  christos 		     : (err2 != REG_NOERROR ? err2 : err3));
   3252  1.1  christos 	      return err;
   3253  1.1  christos 	    }
   3254  1.1  christos 	  /* TODO: It is still inefficient...  */
   3255  1.1  christos 	  goto restart;
   3256  1.1  christos 	}
   3257  1.1  christos       else
   3258  1.1  christos 	{
   3259  1.1  christos 	  re_node_set union_set;
   3260  1.1  christos 	  next_node = dfa->nexts[ent->node];
   3261  1.1  christos 	  if (mctx->state_log[to_idx])
   3262  1.1  christos 	    {
   3263  1.1  christos 	      bool ok;
   3264  1.1  christos 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
   3265  1.1  christos 					next_node))
   3266  1.1  christos 		continue;
   3267  1.1  christos 	      err = re_node_set_init_copy (&union_set,
   3268  1.1  christos 					   &mctx->state_log[to_idx]->nodes);
   3269  1.1  christos 	      ok = re_node_set_insert (&union_set, next_node);
   3270  1.1  christos 	      if (BE (err != REG_NOERROR || ! ok, 0))
   3271  1.1  christos 		{
   3272  1.1  christos 		  re_node_set_free (&union_set);
   3273  1.1  christos 		  err = err != REG_NOERROR ? err : REG_ESPACE;
   3274  1.1  christos 		  return err;
   3275  1.1  christos 		}
   3276  1.1  christos 	    }
   3277  1.1  christos 	  else
   3278  1.1  christos 	    {
   3279  1.1  christos 	      err = re_node_set_init_1 (&union_set, next_node);
   3280  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   3281  1.1  christos 		return err;
   3282  1.1  christos 	    }
   3283  1.1  christos 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
   3284  1.1  christos 	  re_node_set_free (&union_set);
   3285  1.1  christos 	  if (BE (mctx->state_log[to_idx] == NULL
   3286  1.1  christos 		  && err != REG_NOERROR, 0))
   3287  1.1  christos 	    return err;
   3288  1.1  christos 	}
   3289  1.1  christos     }
   3290  1.1  christos   while (ent++->more);
   3291  1.1  christos   return REG_NOERROR;
   3292  1.1  christos }
   3293  1.1  christos 
   3294  1.1  christos /* Build transition table for the state.
   3295  1.1  christos    Return true if successful.  */
   3296  1.1  christos 
   3297  1.1  christos static bool
   3298  1.1  christos internal_function
   3299  1.1  christos build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
   3300  1.1  christos {
   3301  1.1  christos   reg_errcode_t err;
   3302  1.1  christos   Idx i, j;
   3303  1.1  christos   int ch;
   3304  1.1  christos   bool need_word_trtable = false;
   3305  1.1  christos   bitset_word elem, mask;
   3306  1.1  christos   bool dests_node_malloced = false, dest_states_malloced = false;
   3307  1.1  christos   Idx ndests; /* Number of the destination states from `state'.  */
   3308  1.1  christos   re_dfastate_t **trtable;
   3309  1.1  christos   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   3310  1.1  christos   re_node_set follows, *dests_node;
   3311  1.1  christos   bitset *dests_ch;
   3312  1.1  christos   bitset acceptable;
   3313  1.1  christos 
   3314  1.1  christos   struct dests_alloc
   3315  1.1  christos   {
   3316  1.1  christos     re_node_set dests_node[SBC_MAX];
   3317  1.1  christos     bitset dests_ch[SBC_MAX];
   3318  1.1  christos   } *dests_alloc;
   3319  1.1  christos 
   3320  1.2  christos   /* We build DFA states which corresponds to the destination nodes
   3321  1.1  christos      from `state'.  `dests_node[i]' represents the nodes which i-th
   3322  1.1  christos      destination state contains, and `dests_ch[i]' represents the
   3323  1.1  christos      characters which i-th destination state accepts.  */
   3324  1.2  christos #ifndef __SSP__
   3325  1.1  christos   if (__libc_use_alloca (sizeof (struct dests_alloc)))
   3326  1.1  christos     dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]);
   3327  1.1  christos   else
   3328  1.1  christos #endif
   3329  1.1  christos     {
   3330  1.1  christos       dests_alloc = re_malloc (struct dests_alloc, 1);
   3331  1.1  christos       if (BE (dests_alloc == NULL, 0))
   3332  1.1  christos 	return false;
   3333  1.1  christos       dests_node_malloced = true;
   3334  1.1  christos     }
   3335  1.1  christos   dests_node = dests_alloc->dests_node;
   3336  1.1  christos   dests_ch = dests_alloc->dests_ch;
   3337  1.1  christos 
   3338  1.1  christos   /* Initialize transiton table.  */
   3339  1.1  christos   state->word_trtable = state->trtable = NULL;
   3340  1.1  christos 
   3341  1.1  christos   /* At first, group all nodes belonging to `state' into several
   3342  1.1  christos      destinations.  */
   3343  1.1  christos   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
   3344  1.1  christos   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
   3345  1.1  christos     {
   3346  1.1  christos       if (dests_node_malloced)
   3347  1.1  christos 	free (dests_alloc);
   3348  1.1  christos       if (ndests == 0)
   3349  1.1  christos 	{
   3350  1.1  christos 	  state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
   3351  1.1  christos 	  return true;
   3352  1.1  christos 	}
   3353  1.1  christos       return false;
   3354  1.1  christos     }
   3355  1.1  christos 
   3356  1.1  christos   err = re_node_set_alloc (&follows, ndests + 1);
   3357  1.1  christos   if (BE (err != REG_NOERROR, 0))
   3358  1.1  christos     goto out_free;
   3359  1.1  christos 
   3360  1.1  christos   /* Avoid arithmetic overflow in size calculation.  */
   3361  1.1  christos   if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)
   3362  1.2  christos 	   / (3 * sizeof (re_dfastate_t *)))
   3363  1.1  christos 	  < ndests, 0))
   3364  1.1  christos     goto out_free;
   3365  1.1  christos 
   3366  1.1  christos #ifndef __SSP__
   3367  1.1  christos   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
   3368  1.2  christos 			 + ndests * 3 * sizeof (re_dfastate_t *)))
   3369  1.1  christos     dest_states = (re_dfastate_t **)
   3370  1.1  christos       alloca (ndests * 3 * sizeof (re_dfastate_t *));
   3371  1.1  christos   else
   3372  1.1  christos #endif
   3373  1.1  christos     {
   3374  1.1  christos       dest_states = (re_dfastate_t **)
   3375  1.1  christos 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
   3376  1.1  christos       if (BE (dest_states == NULL, 0))
   3377  1.1  christos 	{
   3378  1.1  christos out_free:
   3379  1.1  christos 	  if (dest_states_malloced)
   3380  1.1  christos 	    free (dest_states);
   3381  1.1  christos 	  re_node_set_free (&follows);
   3382  1.1  christos 	  for (i = 0; i < ndests; ++i)
   3383  1.1  christos 	    re_node_set_free (dests_node + i);
   3384  1.1  christos 	  if (dests_node_malloced)
   3385  1.1  christos 	    free (dests_alloc);
   3386  1.1  christos 	  return false;
   3387  1.1  christos 	}
   3388  1.1  christos       dest_states_malloced = true;
   3389  1.1  christos     }
   3390  1.1  christos   dest_states_word = dest_states + ndests;
   3391  1.1  christos   dest_states_nl = dest_states_word + ndests;
   3392  1.1  christos   bitset_empty (acceptable);
   3393  1.1  christos 
   3394  1.1  christos   /* Then build the states for all destinations.  */
   3395  1.1  christos   for (i = 0; i < ndests; ++i)
   3396  1.1  christos     {
   3397  1.1  christos       Idx next_node;
   3398  1.1  christos       re_node_set_empty (&follows);
   3399  1.1  christos       /* Merge the follows of this destination states.  */
   3400  1.1  christos       for (j = 0; j < dests_node[i].nelem; ++j)
   3401  1.1  christos 	{
   3402  1.1  christos 	  next_node = dfa->nexts[dests_node[i].elems[j]];
   3403  1.1  christos 	  if (next_node != REG_MISSING)
   3404  1.1  christos 	    {
   3405  1.1  christos 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
   3406  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   3407  1.1  christos 		goto out_free;
   3408  1.1  christos 	    }
   3409  1.1  christos 	}
   3410  1.1  christos       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
   3411  1.1  christos       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
   3412  1.1  christos 	goto out_free;
   3413  1.1  christos       /* If the new state has context constraint,
   3414  1.1  christos 	 build appropriate states for these contexts.  */
   3415  1.1  christos       if (dest_states[i]->has_constraint)
   3416  1.1  christos 	{
   3417  1.1  christos 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
   3418  1.1  christos 							  CONTEXT_WORD);
   3419  1.1  christos 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
   3420  1.1  christos 	    goto out_free;
   3421  1.1  christos 
   3422  1.1  christos 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
   3423  1.1  christos 	    need_word_trtable = true;
   3424  1.1  christos 
   3425  1.1  christos 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
   3426  1.1  christos 							CONTEXT_NEWLINE);
   3427  1.1  christos 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
   3428  1.1  christos 	    goto out_free;
   3429  1.1  christos  	}
   3430  1.1  christos       else
   3431  1.1  christos 	{
   3432  1.1  christos 	  dest_states_word[i] = dest_states[i];
   3433  1.1  christos 	  dest_states_nl[i] = dest_states[i];
   3434  1.1  christos 	}
   3435  1.1  christos       bitset_merge (acceptable, dests_ch[i]);
   3436  1.1  christos     }
   3437  1.1  christos 
   3438  1.1  christos   if (!BE (need_word_trtable, 0))
   3439  1.1  christos     {
   3440  1.1  christos       /* We don't care about whether the following character is a word
   3441  1.1  christos 	 character, or we are in a single-byte character set so we can
   3442  1.1  christos 	 discern by looking at the character code: allocate a
   3443  1.1  christos 	 256-entry transition table.  */
   3444  1.1  christos       trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
   3445  1.1  christos       if (BE (trtable == NULL, 0))
   3446  1.1  christos 	goto out_free;
   3447  1.1  christos 
   3448  1.1  christos       /* For all characters ch...:  */
   3449  1.1  christos       for (i = 0; i < BITSET_WORDS; ++i)
   3450  1.1  christos 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
   3451  1.1  christos 	     elem;
   3452  1.1  christos 	     mask <<= 1, elem >>= 1, ++ch)
   3453  1.1  christos 	  if (BE (elem & 1, 0))
   3454  1.1  christos 	    {
   3455  1.1  christos 	      /* There must be exactly one destination which accepts
   3456  1.1  christos 		 character ch.  See group_nodes_into_DFAstates.  */
   3457  1.1  christos 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
   3458  1.1  christos 		;
   3459  1.1  christos 
   3460  1.1  christos 	      /* j-th destination accepts the word character ch.  */
   3461  1.1  christos 	      if (dfa->word_char[i] & mask)
   3462  1.1  christos 		trtable[ch] = dest_states_word[j];
   3463  1.1  christos 	      else
   3464  1.1  christos 		trtable[ch] = dest_states[j];
   3465  1.1  christos 	    }
   3466  1.1  christos     }
   3467  1.1  christos   else
   3468  1.1  christos     {
   3469  1.1  christos       /* We care about whether the following character is a word
   3470  1.1  christos 	 character, and we are in a multi-byte character set: discern
   3471  1.1  christos 	 by looking at the character code: build two 256-entry
   3472  1.1  christos 	 transition tables, one starting at trtable[0] and one
   3473  1.1  christos 	 starting at trtable[SBC_MAX].  */
   3474  1.1  christos       trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
   3475  1.1  christos       if (BE (trtable == NULL, 0))
   3476  1.1  christos 	goto out_free;
   3477  1.1  christos 
   3478  1.1  christos       /* For all characters ch...:  */
   3479  1.1  christos       for (i = 0; i < BITSET_WORDS; ++i)
   3480  1.1  christos 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
   3481  1.1  christos 	     elem;
   3482  1.1  christos 	     mask <<= 1, elem >>= 1, ++ch)
   3483  1.1  christos 	  if (BE (elem & 1, 0))
   3484  1.1  christos 	    {
   3485  1.1  christos 	      /* There must be exactly one destination which accepts
   3486  1.1  christos 		 character ch.  See group_nodes_into_DFAstates.  */
   3487  1.1  christos 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
   3488  1.1  christos 		;
   3489  1.1  christos 
   3490  1.1  christos 	      /* j-th destination accepts the word character ch.  */
   3491  1.1  christos 	      trtable[ch] = dest_states[j];
   3492  1.1  christos 	      trtable[ch + SBC_MAX] = dest_states_word[j];
   3493  1.1  christos 	    }
   3494  1.1  christos     }
   3495  1.1  christos 
   3496  1.1  christos   /* new line */
   3497  1.1  christos   if (bitset_contain (acceptable, NEWLINE_CHAR))
   3498  1.1  christos     {
   3499  1.1  christos       /* The current state accepts newline character.  */
   3500  1.1  christos       for (j = 0; j < ndests; ++j)
   3501  1.1  christos 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
   3502  1.1  christos 	  {
   3503  1.1  christos 	    /* k-th destination accepts newline character.  */
   3504  1.1  christos 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
   3505  1.1  christos 	    if (need_word_trtable)
   3506  1.1  christos 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
   3507  1.1  christos 	    /* There must be only one destination which accepts
   3508  1.1  christos 	       newline.  See group_nodes_into_DFAstates.  */
   3509  1.1  christos 	    break;
   3510  1.1  christos 	  }
   3511  1.1  christos     }
   3512  1.1  christos 
   3513  1.1  christos   if (dest_states_malloced)
   3514  1.1  christos     free (dest_states);
   3515  1.1  christos 
   3516  1.1  christos   re_node_set_free (&follows);
   3517  1.1  christos   for (i = 0; i < ndests; ++i)
   3518  1.1  christos     re_node_set_free (dests_node + i);
   3519  1.1  christos 
   3520  1.1  christos   if (dests_node_malloced)
   3521  1.1  christos     free (dests_alloc);
   3522  1.1  christos 
   3523  1.1  christos   return true;
   3524  1.1  christos }
   3525  1.1  christos 
   3526  1.1  christos /* Group all nodes belonging to STATE into several destinations.
   3527  1.1  christos    Then for all destinations, set the nodes belonging to the destination
   3528  1.1  christos    to DESTS_NODE[i] and set the characters accepted by the destination
   3529  1.1  christos    to DEST_CH[i].  This function return the number of destinations.  */
   3530  1.1  christos 
   3531  1.1  christos static Idx
   3532  1.1  christos internal_function
   3533  1.1  christos group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
   3534  1.1  christos 			    re_node_set *dests_node, bitset *dests_ch)
   3535  1.1  christos {
   3536  1.1  christos   reg_errcode_t err;
   3537  1.1  christos   bool ok;
   3538  1.1  christos   Idx i, j, k;
   3539  1.1  christos   Idx ndests; /* Number of the destinations from `state'.  */
   3540  1.1  christos   bitset accepts; /* Characters a node can accept.  */
   3541  1.1  christos   const re_node_set *cur_nodes = &state->nodes;
   3542  1.1  christos   bitset_empty (accepts);
   3543  1.1  christos   ndests = 0;
   3544  1.1  christos 
   3545  1.1  christos   /* For all the nodes belonging to `state',  */
   3546  1.1  christos   for (i = 0; i < cur_nodes->nelem; ++i)
   3547  1.1  christos     {
   3548  1.1  christos       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
   3549  1.1  christos       re_token_type_t type = node->type;
   3550  1.1  christos       unsigned int constraint = node->constraint;
   3551  1.1  christos 
   3552  1.1  christos       /* Enumerate all single byte character this node can accept.  */
   3553  1.1  christos       if (type == CHARACTER)
   3554  1.1  christos 	bitset_set (accepts, node->opr.c);
   3555  1.1  christos       else if (type == SIMPLE_BRACKET)
   3556  1.1  christos 	{
   3557  1.1  christos 	  bitset_merge (accepts, node->opr.sbcset);
   3558  1.1  christos 	}
   3559  1.1  christos       else if (type == OP_PERIOD)
   3560  1.1  christos 	{
   3561  1.1  christos #ifdef RE_ENABLE_I18N
   3562  1.1  christos 	  if (dfa->mb_cur_max > 1)
   3563  1.1  christos 	    bitset_merge (accepts, dfa->sb_char);
   3564  1.1  christos 	  else
   3565  1.1  christos #endif
   3566  1.1  christos 	    bitset_set_all (accepts);
   3567  1.1  christos 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
   3568  1.1  christos 	    bitset_clear (accepts, '\n');
   3569  1.1  christos 	  if (dfa->syntax & REG_DOT_NOT_NULL)
   3570  1.1  christos 	    bitset_clear (accepts, '\0');
   3571  1.1  christos 	}
   3572  1.1  christos #ifdef RE_ENABLE_I18N
   3573  1.1  christos       else if (type == OP_UTF8_PERIOD)
   3574  1.1  christos         {
   3575  1.1  christos 	  if (SBC_MAX / 2 % BITSET_WORD_BITS == 0)
   3576  1.1  christos 	    memset (accepts, -1, sizeof accepts / 2);
   3577  1.1  christos 	  else
   3578  1.1  christos 	    bitset_merge (accepts, utf8_sb_map);
   3579  1.1  christos 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
   3580  1.1  christos 	    bitset_clear (accepts, '\n');
   3581  1.1  christos 	  if (dfa->syntax & REG_DOT_NOT_NULL)
   3582  1.1  christos 	    bitset_clear (accepts, '\0');
   3583  1.1  christos         }
   3584  1.1  christos #endif
   3585  1.1  christos       else
   3586  1.1  christos 	continue;
   3587  1.1  christos 
   3588  1.1  christos       /* Check the `accepts' and sift the characters which are not
   3589  1.1  christos 	 match it the context.  */
   3590  1.1  christos       if (constraint)
   3591  1.1  christos 	{
   3592  1.1  christos 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
   3593  1.1  christos 	    {
   3594  1.1  christos 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
   3595  1.1  christos 	      bitset_empty (accepts);
   3596  1.1  christos 	      if (accepts_newline)
   3597  1.1  christos 		bitset_set (accepts, NEWLINE_CHAR);
   3598  1.1  christos 	      else
   3599  1.1  christos 		continue;
   3600  1.1  christos 	    }
   3601  1.1  christos 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
   3602  1.1  christos 	    {
   3603  1.1  christos 	      bitset_empty (accepts);
   3604  1.1  christos 	      continue;
   3605  1.1  christos 	    }
   3606  1.1  christos 
   3607  1.1  christos 	  if (constraint & NEXT_WORD_CONSTRAINT)
   3608  1.1  christos 	    {
   3609  1.1  christos 	      bitset_word any_set = 0;
   3610  1.1  christos 	      if (type == CHARACTER && !node->word_char)
   3611  1.1  christos 		{
   3612  1.1  christos 		  bitset_empty (accepts);
   3613  1.1  christos 		  continue;
   3614  1.1  christos 		}
   3615  1.1  christos #ifdef RE_ENABLE_I18N
   3616  1.1  christos 	      if (dfa->mb_cur_max > 1)
   3617  1.1  christos 		for (j = 0; j < BITSET_WORDS; ++j)
   3618  1.1  christos 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
   3619  1.1  christos 	      else
   3620  1.1  christos #endif
   3621  1.1  christos 		for (j = 0; j < BITSET_WORDS; ++j)
   3622  1.1  christos 		  any_set |= (accepts[j] &= dfa->word_char[j]);
   3623  1.1  christos 	      if (!any_set)
   3624  1.1  christos 		continue;
   3625  1.1  christos 	    }
   3626  1.1  christos 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
   3627  1.1  christos 	    {
   3628  1.1  christos 	      bitset_word any_set = 0;
   3629  1.1  christos 	      if (type == CHARACTER && node->word_char)
   3630  1.1  christos 		{
   3631  1.1  christos 		  bitset_empty (accepts);
   3632  1.1  christos 		  continue;
   3633  1.1  christos 		}
   3634  1.1  christos #ifdef RE_ENABLE_I18N
   3635  1.1  christos 	      if (dfa->mb_cur_max > 1)
   3636  1.1  christos 		for (j = 0; j < BITSET_WORDS; ++j)
   3637  1.1  christos 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
   3638  1.1  christos 	      else
   3639  1.1  christos #endif
   3640  1.1  christos 		for (j = 0; j < BITSET_WORDS; ++j)
   3641  1.1  christos 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
   3642  1.1  christos 	      if (!any_set)
   3643  1.1  christos 		continue;
   3644  1.1  christos 	    }
   3645  1.1  christos 	}
   3646  1.1  christos 
   3647  1.1  christos       /* Then divide `accepts' into DFA states, or create a new
   3648  1.1  christos 	 state.  Above, we make sure that accepts is not empty.  */
   3649  1.1  christos       for (j = 0; j < ndests; ++j)
   3650  1.1  christos 	{
   3651  1.1  christos 	  bitset intersec; /* Intersection sets, see below.  */
   3652  1.1  christos 	  bitset remains;
   3653  1.1  christos 	  /* Flags, see below.  */
   3654  1.1  christos 	  bitset_word has_intersec, not_subset, not_consumed;
   3655  1.1  christos 
   3656  1.1  christos 	  /* Optimization, skip if this state doesn't accept the character.  */
   3657  1.1  christos 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
   3658  1.1  christos 	    continue;
   3659  1.1  christos 
   3660  1.1  christos 	  /* Enumerate the intersection set of this state and `accepts'.  */
   3661  1.1  christos 	  has_intersec = 0;
   3662  1.1  christos 	  for (k = 0; k < BITSET_WORDS; ++k)
   3663  1.1  christos 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
   3664  1.1  christos 	  /* And skip if the intersection set is empty.  */
   3665  1.1  christos 	  if (!has_intersec)
   3666  1.1  christos 	    continue;
   3667  1.1  christos 
   3668  1.1  christos 	  /* Then check if this state is a subset of `accepts'.  */
   3669  1.1  christos 	  not_subset = not_consumed = 0;
   3670  1.1  christos 	  for (k = 0; k < BITSET_WORDS; ++k)
   3671  1.1  christos 	    {
   3672  1.1  christos 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
   3673  1.1  christos 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
   3674  1.1  christos 	    }
   3675  1.1  christos 
   3676  1.1  christos 	  /* If this state isn't a subset of `accepts', create a
   3677  1.1  christos 	     new group state, which has the `remains'. */
   3678  1.1  christos 	  if (not_subset)
   3679  1.1  christos 	    {
   3680  1.1  christos 	      bitset_copy (dests_ch[ndests], remains);
   3681  1.1  christos 	      bitset_copy (dests_ch[j], intersec);
   3682  1.1  christos 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
   3683  1.1  christos 	      if (BE (err != REG_NOERROR, 0))
   3684  1.1  christos 		goto error_return;
   3685  1.1  christos 	      ++ndests;
   3686  1.1  christos 	    }
   3687  1.1  christos 
   3688  1.1  christos 	  /* Put the position in the current group. */
   3689  1.1  christos 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
   3690  1.1  christos 	  if (BE (! ok, 0))
   3691  1.1  christos 	    goto error_return;
   3692  1.1  christos 
   3693  1.1  christos 	  /* If all characters are consumed, go to next node. */
   3694  1.1  christos 	  if (!not_consumed)
   3695  1.1  christos 	    break;
   3696  1.1  christos 	}
   3697  1.1  christos       /* Some characters remain, create a new group. */
   3698  1.1  christos       if (j == ndests)
   3699  1.1  christos 	{
   3700  1.1  christos 	  bitset_copy (dests_ch[ndests], accepts);
   3701  1.1  christos 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
   3702  1.1  christos 	  if (BE (err != REG_NOERROR, 0))
   3703  1.1  christos 	    goto error_return;
   3704  1.1  christos 	  ++ndests;
   3705  1.1  christos 	  bitset_empty (accepts);
   3706  1.1  christos 	}
   3707  1.1  christos     }
   3708  1.1  christos   return ndests;
   3709  1.1  christos  error_return:
   3710  1.1  christos   for (j = 0; j < ndests; ++j)
   3711  1.1  christos     re_node_set_free (dests_node + j);
   3712  1.1  christos   return REG_MISSING;
   3713  1.1  christos }
   3714  1.1  christos 
   3715  1.1  christos #ifdef RE_ENABLE_I18N
   3716  1.1  christos /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
   3717  1.1  christos    Return the number of the bytes the node accepts.
   3718  1.1  christos    STR_IDX is the current index of the input string.
   3719  1.1  christos 
   3720  1.1  christos    This function handles the nodes which can accept one character, or
   3721  1.1  christos    one collating element like '.', '[a-z]', opposite to the other nodes
   3722  1.1  christos    can only accept one byte.  */
   3723  1.1  christos 
   3724  1.1  christos static int
   3725  1.1  christos internal_function
   3726  1.1  christos check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
   3727  1.1  christos 			 const re_string_t *input, Idx str_idx)
   3728  1.1  christos {
   3729  1.1  christos   const re_token_t *node = dfa->nodes + node_idx;
   3730  1.1  christos   int char_len, elem_len;
   3731  1.1  christos   Idx i;
   3732  1.1  christos 
   3733  1.1  christos   if (BE (node->type == OP_UTF8_PERIOD, 0))
   3734  1.1  christos     {
   3735  1.1  christos       unsigned char c = re_string_byte_at (input, str_idx), d;
   3736  1.1  christos       if (BE (c < 0xc2, 1))
   3737  1.1  christos 	return 0;
   3738  1.1  christos 
   3739  1.1  christos       if (str_idx + 2 > input->len)
   3740  1.1  christos 	return 0;
   3741  1.1  christos 
   3742  1.1  christos       d = re_string_byte_at (input, str_idx + 1);
   3743  1.1  christos       if (c < 0xe0)
   3744  1.1  christos 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
   3745  1.1  christos       else if (c < 0xf0)
   3746  1.1  christos 	{
   3747  1.1  christos 	  char_len = 3;
   3748  1.1  christos 	  if (c == 0xe0 && d < 0xa0)
   3749  1.1  christos 	    return 0;
   3750  1.1  christos 	}
   3751  1.1  christos       else if (c < 0xf8)
   3752  1.1  christos 	{
   3753  1.1  christos 	  char_len = 4;
   3754  1.1  christos 	  if (c == 0xf0 && d < 0x90)
   3755  1.1  christos 	    return 0;
   3756  1.1  christos 	}
   3757  1.1  christos       else if (c < 0xfc)
   3758  1.1  christos 	{
   3759  1.1  christos 	  char_len = 5;
   3760  1.1  christos 	  if (c == 0xf8 && d < 0x88)
   3761  1.1  christos 	    return 0;
   3762  1.1  christos 	}
   3763  1.1  christos       else if (c < 0xfe)
   3764  1.1  christos 	{
   3765  1.1  christos 	  char_len = 6;
   3766  1.1  christos 	  if (c == 0xfc && d < 0x84)
   3767  1.1  christos 	    return 0;
   3768  1.1  christos 	}
   3769  1.1  christos       else
   3770  1.1  christos 	return 0;
   3771  1.1  christos 
   3772  1.1  christos       if (str_idx + char_len > input->len)
   3773  1.1  christos 	return 0;
   3774  1.1  christos 
   3775  1.1  christos       for (i = 1; i < char_len; ++i)
   3776  1.1  christos 	{
   3777  1.1  christos 	  d = re_string_byte_at (input, str_idx + i);
   3778  1.1  christos 	  if (d < 0x80 || d > 0xbf)
   3779  1.1  christos 	    return 0;
   3780  1.1  christos 	}
   3781  1.1  christos       return char_len;
   3782  1.1  christos     }
   3783  1.1  christos 
   3784  1.1  christos   char_len = re_string_char_size_at (input, str_idx);
   3785  1.1  christos   if (node->type == OP_PERIOD)
   3786  1.1  christos     {
   3787  1.1  christos       if (char_len <= 1)
   3788  1.1  christos         return 0;
   3789  1.1  christos       /* FIXME: I don't think this if is needed, as both '\n'
   3790  1.1  christos 	 and '\0' are char_len == 1.  */
   3791  1.1  christos       /* '.' accepts any one character except the following two cases.  */
   3792  1.1  christos       if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
   3793  1.1  christos 	   re_string_byte_at (input, str_idx) == '\n') ||
   3794  1.1  christos 	  ((dfa->syntax & REG_DOT_NOT_NULL) &&
   3795  1.1  christos 	   re_string_byte_at (input, str_idx) == '\0'))
   3796  1.1  christos 	return 0;
   3797  1.1  christos       return char_len;
   3798  1.1  christos     }
   3799  1.1  christos 
   3800  1.1  christos   elem_len = re_string_elem_size_at (input, str_idx);
   3801  1.1  christos   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
   3802  1.1  christos     return 0;
   3803  1.1  christos 
   3804  1.1  christos   if (node->type == COMPLEX_BRACKET)
   3805  1.1  christos     {
   3806  1.1  christos       const re_charset_t *cset = node->opr.mbcset;
   3807  1.1  christos # ifdef _LIBC
   3808  1.1  christos       const unsigned char *pin
   3809  1.1  christos 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
   3810  1.1  christos       Idx j;
   3811  1.1  christos       uint32_t nrules;
   3812  1.1  christos # endif /* _LIBC */
   3813  1.1  christos       int match_len = 0;
   3814  1.1  christos       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
   3815  1.1  christos 		    ? re_string_wchar_at (input, str_idx) : 0);
   3816  1.1  christos 
   3817  1.1  christos       /* match with multibyte character?  */
   3818  1.1  christos       for (i = 0; i < cset->nmbchars; ++i)
   3819  1.1  christos 	if (wc == cset->mbchars[i])
   3820  1.1  christos 	  {
   3821  1.1  christos 	    match_len = char_len;
   3822  1.1  christos 	    goto check_node_accept_bytes_match;
   3823  1.1  christos 	  }
   3824  1.1  christos       /* match with character_class?  */
   3825  1.1  christos       for (i = 0; i < cset->nchar_classes; ++i)
   3826  1.1  christos 	{
   3827  1.1  christos 	  wctype_t wt = cset->char_classes[i];
   3828  1.1  christos 	  if (__iswctype (wc, wt))
   3829  1.1  christos 	    {
   3830  1.1  christos 	      match_len = char_len;
   3831  1.1  christos 	      goto check_node_accept_bytes_match;
   3832  1.1  christos 	    }
   3833  1.1  christos 	}
   3834  1.1  christos 
   3835  1.1  christos # ifdef _LIBC
   3836  1.1  christos       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3837  1.1  christos       if (nrules != 0)
   3838  1.1  christos 	{
   3839  1.1  christos 	  unsigned int in_collseq = 0;
   3840  1.1  christos 	  const int32_t *table, *indirect;
   3841  1.1  christos 	  const unsigned char *weights, *extra;
   3842  1.1  christos 	  const char *collseqwc;
   3843  1.1  christos 	  int32_t idx;
   3844  1.1  christos 	  /* This #include defines a local function!  */
   3845  1.1  christos #  include <locale/weight.h>
   3846  1.1  christos 
   3847  1.1  christos 	  /* match with collating_symbol?  */
   3848  1.1  christos 	  if (cset->ncoll_syms)
   3849  1.1  christos 	    extra = (const unsigned char *)
   3850  1.1  christos 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   3851  1.1  christos 	  for (i = 0; i < cset->ncoll_syms; ++i)
   3852  1.1  christos 	    {
   3853  1.1  christos 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
   3854  1.1  christos 	      /* Compare the length of input collating element and
   3855  1.1  christos 		 the length of current collating element.  */
   3856  1.1  christos 	      if (*coll_sym != elem_len)
   3857  1.1  christos 		continue;
   3858  1.1  christos 	      /* Compare each bytes.  */
   3859  1.1  christos 	      for (j = 0; j < *coll_sym; j++)
   3860  1.1  christos 		if (pin[j] != coll_sym[1 + j])
   3861  1.1  christos 		  break;
   3862  1.1  christos 	      if (j == *coll_sym)
   3863  1.1  christos 		{
   3864  1.1  christos 		  /* Match if every bytes is equal.  */
   3865  1.1  christos 		  match_len = j;
   3866  1.1  christos 		  goto check_node_accept_bytes_match;
   3867  1.1  christos 		}
   3868  1.1  christos 	    }
   3869  1.1  christos 
   3870  1.1  christos 	  if (cset->nranges)
   3871  1.1  christos 	    {
   3872  1.1  christos 	      if (elem_len <= char_len)
   3873  1.1  christos 		{
   3874  1.1  christos 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   3875  1.1  christos 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
   3876  1.1  christos 		}
   3877  1.1  christos 	      else
   3878  1.1  christos 		in_collseq = find_collation_sequence_value (pin, elem_len);
   3879  1.1  christos 	    }
   3880  1.1  christos 	  /* match with range expression?  */
   3881  1.1  christos 	  for (i = 0; i < cset->nranges; ++i)
   3882  1.1  christos 	    if (cset->range_starts[i] <= in_collseq
   3883  1.1  christos 		&& in_collseq <= cset->range_ends[i])
   3884  1.1  christos 	      {
   3885  1.1  christos 		match_len = elem_len;
   3886  1.1  christos 		goto check_node_accept_bytes_match;
   3887  1.1  christos 	      }
   3888  1.1  christos 
   3889  1.1  christos 	  /* match with equivalence_class?  */
   3890  1.1  christos 	  if (cset->nequiv_classes)
   3891  1.1  christos 	    {
   3892  1.1  christos 	      const unsigned char *cp = pin;
   3893  1.1  christos 	      table = (const int32_t *)
   3894  1.1  christos 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3895  1.1  christos 	      weights = (const unsigned char *)
   3896  1.1  christos 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3897  1.1  christos 	      extra = (const unsigned char *)
   3898  1.1  christos 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3899  1.1  christos 	      indirect = (const int32_t *)
   3900  1.1  christos 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3901  1.1  christos 	      idx = findidx (&cp);
   3902  1.1  christos 	      if (idx > 0)
   3903  1.1  christos 		for (i = 0; i < cset->nequiv_classes; ++i)
   3904  1.1  christos 		  {
   3905  1.1  christos 		    int32_t equiv_class_idx = cset->equiv_classes[i];
   3906  1.1  christos 		    size_t weight_len = weights[idx];
   3907  1.1  christos 		    if (weight_len == weights[equiv_class_idx])
   3908  1.1  christos 		      {
   3909  1.1  christos 			Idx cnt = 0;
   3910  1.1  christos 			while (cnt <= weight_len
   3911  1.1  christos 			       && (weights[equiv_class_idx + 1 + cnt]
   3912  1.1  christos 				   == weights[idx + 1 + cnt]))
   3913  1.1  christos 			  ++cnt;
   3914  1.1  christos 			if (cnt > weight_len)
   3915  1.1  christos 			  {
   3916  1.1  christos 			    match_len = elem_len;
   3917  1.1  christos 			    goto check_node_accept_bytes_match;
   3918  1.1  christos 			  }
   3919  1.1  christos 		      }
   3920  1.1  christos 		  }
   3921  1.1  christos 	    }
   3922  1.1  christos 	}
   3923  1.1  christos       else
   3924  1.1  christos # endif /* _LIBC */
   3925  1.1  christos 	{
   3926  1.1  christos 	  /* match with range expression?  */
   3927  1.1  christos #if __GNUC__ >= 2
   3928  1.1  christos 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
   3929  1.1  christos #else
   3930  1.1  christos 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
   3931  1.1  christos 	  cmp_buf[2] = wc;
   3932  1.1  christos #endif
   3933  1.1  christos 	  for (i = 0; i < cset->nranges; ++i)
   3934  1.1  christos 	    {
   3935  1.1  christos 	      cmp_buf[0] = cset->range_starts[i];
   3936  1.1  christos 	      cmp_buf[4] = cset->range_ends[i];
   3937  1.1  christos 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
   3938  1.1  christos 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
   3939  1.1  christos 		{
   3940  1.1  christos 		  match_len = char_len;
   3941  1.1  christos 		  goto check_node_accept_bytes_match;
   3942  1.1  christos 		}
   3943  1.1  christos 	    }
   3944  1.1  christos 	}
   3945  1.1  christos     check_node_accept_bytes_match:
   3946  1.1  christos       if (!cset->non_match)
   3947  1.1  christos 	return match_len;
   3948  1.1  christos       else
   3949  1.1  christos 	{
   3950  1.1  christos 	  if (match_len > 0)
   3951  1.1  christos 	    return 0;
   3952  1.1  christos 	  else
   3953  1.1  christos 	    return (elem_len > char_len) ? elem_len : char_len;
   3954  1.1  christos 	}
   3955  1.1  christos     }
   3956  1.1  christos   return 0;
   3957  1.1  christos }
   3958  1.1  christos 
   3959  1.1  christos # ifdef _LIBC
   3960  1.1  christos static unsigned int
   3961  1.1  christos find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
   3962  1.1  christos {
   3963  1.1  christos   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3964  1.1  christos   if (nrules == 0)
   3965  1.1  christos     {
   3966  1.1  christos       if (mbs_len == 1)
   3967  1.1  christos 	{
   3968  1.1  christos 	  /* No valid character.  Match it as a single byte character.  */
   3969  1.1  christos 	  const unsigned char *collseq = (const unsigned char *)
   3970  1.1  christos 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
   3971  1.1  christos 	  return collseq[mbs[0]];
   3972  1.1  christos 	}
   3973  1.1  christos       return UINT_MAX;
   3974  1.1  christos     }
   3975  1.1  christos   else
   3976  1.1  christos     {
   3977  1.1  christos       int32_t idx;
   3978  1.1  christos       const unsigned char *extra = (const unsigned char *)
   3979  1.1  christos 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   3980  1.1  christos       int32_t extrasize = (const unsigned char *)
   3981  1.1  christos 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
   3982  1.1  christos 
   3983  1.1  christos       for (idx = 0; idx < extrasize;)
   3984  1.1  christos 	{
   3985  1.1  christos 	  int mbs_cnt;
   3986  1.1  christos 	  bool found = false;
   3987  1.1  christos 	  int32_t elem_mbs_len;
   3988  1.1  christos 	  /* Skip the name of collating element name.  */
   3989  1.1  christos 	  idx = idx + extra[idx] + 1;
   3990  1.1  christos 	  elem_mbs_len = extra[idx++];
   3991  1.1  christos 	  if (mbs_len == elem_mbs_len)
   3992  1.1  christos 	    {
   3993  1.1  christos 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
   3994  1.1  christos 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
   3995  1.1  christos 		  break;
   3996  1.1  christos 	      if (mbs_cnt == elem_mbs_len)
   3997  1.1  christos 		/* Found the entry.  */
   3998  1.1  christos 		found = true;
   3999  1.1  christos 	    }
   4000  1.1  christos 	  /* Skip the byte sequence of the collating element.  */
   4001  1.1  christos 	  idx += elem_mbs_len;
   4002  1.1  christos 	  /* Adjust for the alignment.  */
   4003  1.1  christos 	  idx = (idx + 3) & ~3;
   4004  1.1  christos 	  /* Skip the collation sequence value.  */
   4005  1.1  christos 	  idx += sizeof (uint32_t);
   4006  1.1  christos 	  /* Skip the wide char sequence of the collating element.  */
   4007  1.1  christos 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
   4008  1.1  christos 	  /* If we found the entry, return the sequence value.  */
   4009  1.1  christos 	  if (found)
   4010  1.1  christos 	    return *(uint32_t *) (extra + idx);
   4011  1.1  christos 	  /* Skip the collation sequence value.  */
   4012  1.1  christos 	  idx += sizeof (uint32_t);
   4013  1.1  christos 	}
   4014  1.1  christos       return UINT_MAX;
   4015  1.1  christos     }
   4016  1.1  christos }
   4017  1.1  christos # endif /* _LIBC */
   4018  1.1  christos #endif /* RE_ENABLE_I18N */
   4019  1.1  christos 
   4020  1.1  christos /* Check whether the node accepts the byte which is IDX-th
   4021  1.1  christos    byte of the INPUT.  */
   4022  1.1  christos 
   4023  1.1  christos static bool
   4024  1.1  christos internal_function
   4025  1.1  christos check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
   4026  1.1  christos 		   Idx idx)
   4027  1.1  christos {
   4028  1.1  christos   unsigned char ch;
   4029  1.1  christos   ch = re_string_byte_at (&mctx->input, idx);
   4030  1.1  christos   switch (node->type)
   4031  1.1  christos     {
   4032  1.1  christos     case CHARACTER:
   4033  1.1  christos       if (node->opr.c != ch)
   4034  1.1  christos         return false;
   4035  1.1  christos       break;
   4036  1.1  christos 
   4037  1.1  christos     case SIMPLE_BRACKET:
   4038  1.1  christos       if (!bitset_contain (node->opr.sbcset, ch))
   4039  1.1  christos         return false;
   4040  1.1  christos       break;
   4041  1.1  christos 
   4042  1.1  christos #ifdef RE_ENABLE_I18N
   4043  1.1  christos     case OP_UTF8_PERIOD:
   4044  1.1  christos       if (ch >= 0x80)
   4045  1.1  christos         return false;
   4046  1.1  christos       /* FALLTHROUGH */
   4047  1.1  christos #endif
   4048  1.1  christos     case OP_PERIOD:
   4049  1.1  christos       if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
   4050  1.1  christos 	  || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
   4051  1.1  christos 	return false;
   4052  1.1  christos       break;
   4053  1.1  christos 
   4054  1.1  christos     default:
   4055  1.1  christos       return false;
   4056  1.1  christos     }
   4057  1.1  christos 
   4058  1.1  christos   if (node->constraint)
   4059  1.1  christos     {
   4060  1.1  christos       /* The node has constraints.  Check whether the current context
   4061  1.1  christos 	 satisfies the constraints.  */
   4062  1.1  christos       unsigned int context = re_string_context_at (&mctx->input, idx,
   4063  1.1  christos 						   mctx->eflags);
   4064  1.1  christos       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
   4065  1.1  christos 	return false;
   4066  1.1  christos     }
   4067  1.1  christos 
   4068  1.1  christos   return true;
   4069  1.1  christos }
   4070  1.1  christos 
   4071  1.1  christos /* Extend the buffers, if the buffers have run out.  */
   4072  1.1  christos 
   4073  1.1  christos static reg_errcode_t
   4074  1.1  christos internal_function
   4075  1.1  christos extend_buffers (re_match_context_t *mctx)
   4076  1.1  christos {
   4077  1.1  christos   reg_errcode_t ret;
   4078  1.1  christos   re_string_t *pstr = &mctx->input;
   4079  1.1  christos 
   4080  1.1  christos   /* Double the lengthes of the buffers.  */
   4081  1.1  christos   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
   4082  1.1  christos   if (BE (ret != REG_NOERROR, 0))
   4083  1.1  christos     return ret;
   4084  1.1  christos 
   4085  1.1  christos   if (mctx->state_log != NULL)
   4086  1.1  christos     {
   4087  1.1  christos       /* And double the length of state_log.  */
   4088  1.1  christos       /* XXX We have no indication of the size of this buffer.  If this
   4089  1.1  christos 	 allocation fail we have no indication that the state_log array
   4090  1.1  christos 	 does not have the right size.  */
   4091  1.1  christos       re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *,
   4092  1.1  christos 					       pstr->bufs_len + 1);
   4093  1.1  christos       if (BE (new_array == NULL, 0))
   4094  1.1  christos 	return REG_ESPACE;
   4095  1.1  christos       mctx->state_log = new_array;
   4096  1.1  christos     }
   4097  1.1  christos 
   4098  1.1  christos   /* Then reconstruct the buffers.  */
   4099  1.1  christos   if (pstr->icase)
   4100  1.1  christos     {
   4101  1.1  christos #ifdef RE_ENABLE_I18N
   4102  1.1  christos       if (pstr->mb_cur_max > 1)
   4103  1.1  christos 	{
   4104  1.1  christos 	  ret = build_wcs_upper_buffer (pstr);
   4105  1.1  christos 	  if (BE (ret != REG_NOERROR, 0))
   4106  1.1  christos 	    return ret;
   4107  1.1  christos 	}
   4108  1.1  christos       else
   4109  1.1  christos #endif /* RE_ENABLE_I18N  */
   4110  1.1  christos 	build_upper_buffer (pstr);
   4111  1.1  christos     }
   4112  1.1  christos   else
   4113  1.1  christos     {
   4114  1.1  christos #ifdef RE_ENABLE_I18N
   4115  1.1  christos       if (pstr->mb_cur_max > 1)
   4116  1.1  christos 	build_wcs_buffer (pstr);
   4117  1.1  christos       else
   4118  1.1  christos #endif /* RE_ENABLE_I18N  */
   4119  1.1  christos 	{
   4120  1.1  christos 	  if (pstr->trans != NULL)
   4121  1.1  christos 	    re_string_translate_buffer (pstr);
   4122  1.1  christos 	}
   4123  1.1  christos     }
   4124  1.1  christos   return REG_NOERROR;
   4125  1.1  christos }
   4126  1.1  christos 
   4127  1.1  christos 
   4128  1.1  christos /* Functions for matching context.  */
   4130  1.1  christos 
   4131  1.1  christos /* Initialize MCTX.  */
   4132  1.1  christos 
   4133  1.1  christos static reg_errcode_t
   4134  1.1  christos internal_function
   4135  1.1  christos match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
   4136  1.1  christos {
   4137  1.1  christos   mctx->eflags = eflags;
   4138  1.1  christos   mctx->match_last = REG_MISSING;
   4139  1.1  christos   if (n > 0)
   4140  1.1  christos     {
   4141  1.1  christos       mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n);
   4142  1.1  christos       mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n);
   4143  1.1  christos       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
   4144  1.1  christos 	return REG_ESPACE;
   4145  1.1  christos     }
   4146  1.1  christos   /* Already zero-ed by the caller.
   4147  1.1  christos      else
   4148  1.1  christos        mctx->bkref_ents = NULL;
   4149  1.1  christos      mctx->nbkref_ents = 0;
   4150  1.1  christos      mctx->nsub_tops = 0;  */
   4151  1.1  christos   mctx->abkref_ents = n;
   4152  1.1  christos   mctx->max_mb_elem_len = 1;
   4153  1.1  christos   mctx->asub_tops = n;
   4154  1.1  christos   return REG_NOERROR;
   4155  1.1  christos }
   4156  1.1  christos 
   4157  1.1  christos /* Clean the entries which depend on the current input in MCTX.
   4158  1.1  christos    This function must be invoked when the matcher changes the start index
   4159  1.1  christos    of the input, or changes the input string.  */
   4160  1.1  christos 
   4161  1.1  christos static void
   4162  1.1  christos internal_function
   4163  1.1  christos match_ctx_clean (re_match_context_t *mctx)
   4164  1.1  christos {
   4165  1.1  christos   Idx st_idx;
   4166  1.1  christos   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
   4167  1.1  christos     {
   4168  1.1  christos       Idx sl_idx;
   4169  1.1  christos       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
   4170  1.1  christos       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
   4171  1.1  christos 	{
   4172  1.1  christos 	  re_sub_match_last_t *last = top->lasts[sl_idx];
   4173  1.1  christos 	  re_free (last->path.array);
   4174  1.1  christos 	  re_free (last);
   4175  1.1  christos 	}
   4176  1.1  christos       re_free (top->lasts);
   4177  1.1  christos       if (top->path)
   4178  1.1  christos 	{
   4179  1.1  christos 	  re_free (top->path->array);
   4180  1.1  christos 	  re_free (top->path);
   4181  1.1  christos 	}
   4182  1.1  christos       free (top);
   4183  1.1  christos     }
   4184  1.1  christos 
   4185  1.1  christos   mctx->nsub_tops = 0;
   4186  1.1  christos   mctx->nbkref_ents = 0;
   4187  1.1  christos }
   4188  1.1  christos 
   4189  1.1  christos /* Free all the memory associated with MCTX.  */
   4190  1.1  christos 
   4191  1.1  christos static void
   4192  1.1  christos internal_function
   4193  1.1  christos match_ctx_free (re_match_context_t *mctx)
   4194  1.1  christos {
   4195  1.1  christos   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
   4196  1.1  christos   match_ctx_clean (mctx);
   4197  1.1  christos   re_free (mctx->sub_tops);
   4198  1.1  christos   re_free (mctx->bkref_ents);
   4199  1.1  christos }
   4200  1.1  christos 
   4201  1.1  christos /* Add a new backreference entry to MCTX.
   4202  1.1  christos    Note that we assume that caller never call this function with duplicate
   4203  1.1  christos    entry, and call with STR_IDX which isn't smaller than any existing entry.
   4204  1.1  christos */
   4205  1.1  christos 
   4206  1.1  christos static reg_errcode_t
   4207  1.1  christos internal_function
   4208  1.1  christos match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx,
   4209  1.1  christos 		     Idx from, Idx to)
   4210  1.1  christos {
   4211  1.1  christos   if (mctx->nbkref_ents >= mctx->abkref_ents)
   4212  1.1  christos     {
   4213  1.1  christos       struct re_backref_cache_entry* new_entry;
   4214  1.1  christos       new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry,
   4215  1.1  christos 				&mctx->abkref_ents);
   4216  1.1  christos       if (BE (new_entry == NULL, 0))
   4217  1.1  christos 	{
   4218  1.1  christos 	  re_free (mctx->bkref_ents);
   4219  1.1  christos 	  return REG_ESPACE;
   4220  1.1  christos 	}
   4221  1.1  christos       mctx->bkref_ents = new_entry;
   4222  1.1  christos       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
   4223  1.1  christos 	      (sizeof (struct re_backref_cache_entry)
   4224  1.1  christos 	       * (mctx->abkref_ents - mctx->nbkref_ents)));
   4225  1.1  christos     }
   4226  1.1  christos   if (mctx->nbkref_ents > 0
   4227  1.1  christos       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
   4228  1.1  christos     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
   4229  1.1  christos 
   4230  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents].node = node;
   4231  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
   4232  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
   4233  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
   4234  1.1  christos 
   4235  1.1  christos   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
   4236  1.1  christos      If bit N is clear, means that this entry won't epsilon-transition to
   4237  1.1  christos      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
   4238  1.1  christos      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
   4239  1.1  christos      such node.
   4240  1.1  christos 
   4241  1.1  christos      A backreference does not epsilon-transition unless it is empty, so set
   4242  1.1  christos      to all zeros if FROM != TO.  */
   4243  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
   4244  1.1  christos     = (from == to ? -1 : 0);
   4245  1.1  christos 
   4246  1.1  christos   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
   4247  1.1  christos   if (mctx->max_mb_elem_len < to - from)
   4248  1.1  christos     mctx->max_mb_elem_len = to - from;
   4249  1.1  christos   return REG_NOERROR;
   4250  1.1  christos }
   4251  1.1  christos 
   4252  1.1  christos /* Return the first entry with the same str_idx, or REG_MISSING if none is
   4253  1.1  christos    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
   4254  1.1  christos 
   4255  1.1  christos static Idx
   4256  1.1  christos internal_function
   4257  1.1  christos search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
   4258  1.1  christos {
   4259  1.1  christos   Idx left, right, mid, last;
   4260  1.1  christos   last = right = mctx->nbkref_ents;
   4261  1.1  christos   for (left = 0; left < right;)
   4262  1.1  christos     {
   4263  1.1  christos       mid = (left + right) / 2;
   4264  1.1  christos       if (mctx->bkref_ents[mid].str_idx < str_idx)
   4265  1.1  christos 	left = mid + 1;
   4266  1.1  christos       else
   4267  1.1  christos 	right = mid;
   4268  1.1  christos     }
   4269  1.1  christos   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
   4270  1.1  christos     return left;
   4271  1.1  christos   else
   4272  1.1  christos     return REG_MISSING;
   4273  1.1  christos }
   4274  1.1  christos 
   4275  1.1  christos /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
   4276  1.1  christos    at STR_IDX.  */
   4277  1.1  christos 
   4278  1.1  christos static reg_errcode_t
   4279  1.1  christos internal_function
   4280  1.1  christos match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
   4281  1.1  christos {
   4282  1.1  christos #ifdef DEBUG
   4283  1.1  christos   assert (mctx->sub_tops != NULL);
   4284  1.1  christos   assert (mctx->asub_tops > 0);
   4285  1.1  christos #endif
   4286  1.1  christos   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
   4287  1.1  christos     {
   4288  1.1  christos       Idx new_asub_tops = mctx->asub_tops;
   4289  1.1  christos       re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops,
   4290  1.1  christos 						     re_sub_match_top_t *,
   4291  1.1  christos 						     &new_asub_tops);
   4292  1.1  christos       if (BE (new_array == NULL, 0))
   4293  1.1  christos 	return REG_ESPACE;
   4294  1.1  christos       mctx->sub_tops = new_array;
   4295  1.1  christos       mctx->asub_tops = new_asub_tops;
   4296  1.1  christos     }
   4297  1.1  christos   mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
   4298  1.1  christos   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
   4299  1.1  christos     return REG_ESPACE;
   4300  1.1  christos   mctx->sub_tops[mctx->nsub_tops]->node = node;
   4301  1.1  christos   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
   4302  1.1  christos   return REG_NOERROR;
   4303  1.1  christos }
   4304  1.1  christos 
   4305  1.1  christos /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
   4306  1.1  christos    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
   4307  1.1  christos 
   4308  1.1  christos static re_sub_match_last_t *
   4309  1.1  christos internal_function
   4310  1.1  christos match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
   4311  1.1  christos {
   4312  1.1  christos   re_sub_match_last_t *new_entry;
   4313  1.1  christos   if (BE (subtop->nlasts == subtop->alasts, 0))
   4314  1.1  christos     {
   4315  1.1  christos       Idx new_alasts = subtop->alasts;
   4316  1.1  christos       re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts,
   4317  1.1  christos 						      re_sub_match_last_t *,
   4318  1.1  christos 						      &new_alasts);
   4319  1.1  christos       if (BE (new_array == NULL, 0))
   4320  1.1  christos 	return NULL;
   4321  1.1  christos       subtop->lasts = new_array;
   4322  1.1  christos       subtop->alasts = new_alasts;
   4323  1.1  christos     }
   4324  1.1  christos   new_entry = re_calloc (re_sub_match_last_t, 1);
   4325  1.1  christos   if (BE (new_entry != NULL, 1))
   4326  1.1  christos     {
   4327  1.1  christos       subtop->lasts[subtop->nlasts] = new_entry;
   4328  1.1  christos       new_entry->node = node;
   4329  1.1  christos       new_entry->str_idx = str_idx;
   4330  1.1  christos       ++subtop->nlasts;
   4331  1.1  christos     }
   4332  1.1  christos   return new_entry;
   4333  1.1  christos }
   4334  1.1  christos 
   4335  1.1  christos static void
   4336  1.1  christos internal_function
   4337  1.1  christos sift_ctx_init (re_sift_context_t *sctx,
   4338  1.1  christos 	       re_dfastate_t **sifted_sts,
   4339  1.1  christos 	       re_dfastate_t **limited_sts,
   4340  1.1  christos 	       Idx last_node, Idx last_str_idx)
   4341  1.1  christos {
   4342  1.1  christos   sctx->sifted_states = sifted_sts;
   4343                  sctx->limited_states = limited_sts;
   4344                  sctx->last_node = last_node;
   4345                  sctx->last_str_idx = last_str_idx;
   4346                  re_node_set_init_empty (&sctx->limits);
   4347                }
   4348