Home | History | Annotate | Line # | Download | only in lib
      1  1.1  christos /* Extended regular expression matching and search library.
      2  1.1  christos    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
      3  1.1  christos    This file is part of the GNU C Library.
      4  1.1  christos    Contributed by Isamu Hasegawa <isamu (at) yamato.ibm.com>.
      5  1.1  christos 
      6  1.1  christos    This program is free software; you can redistribute it and/or modify
      7  1.1  christos    it under the terms of the GNU General Public License as published by
      8  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
      9  1.1  christos    any later version.
     10  1.1  christos 
     11  1.1  christos    This program is distributed in the hope that it will be useful,
     12  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14  1.1  christos    GNU General Public License for more details.
     15  1.1  christos 
     16  1.1  christos    You should have received a copy of the GNU General Public License along
     17  1.1  christos    with this program; if not, write to the Free Software Foundation,
     18  1.1  christos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
     19  1.2  christos #include <sys/cdefs.h>
     20  1.2  christos __RCSID("$NetBSD: regex_internal.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
     21  1.2  christos 
     22  1.1  christos 
     23  1.1  christos static void re_string_construct_common (const char *str, Idx len,
     24  1.1  christos 					re_string_t *pstr,
     25  1.1  christos 					REG_TRANSLATE_TYPE trans, bool icase,
     26  1.1  christos 					const re_dfa_t *dfa) internal_function;
     27  1.1  christos static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
     28  1.1  christos 					  const re_node_set *nodes,
     29  1.1  christos 					  re_hashval_t hash) internal_function;
     30  1.1  christos static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
     31  1.1  christos 					  const re_node_set *nodes,
     32  1.1  christos 					  unsigned int context,
     33  1.1  christos 					  re_hashval_t hash) internal_function;
     34  1.1  christos 
     35  1.1  christos /* Functions for string operation.  */
     37  1.1  christos 
     38  1.1  christos /* This function allocate the buffers.  It is necessary to call
     39  1.1  christos    re_string_reconstruct before using the object.  */
     40  1.1  christos 
     41  1.1  christos static reg_errcode_t
     42  1.1  christos internal_function
     43  1.1  christos re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
     44  1.1  christos 		    REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
     45  1.1  christos {
     46  1.1  christos   reg_errcode_t ret;
     47  1.1  christos   Idx init_buf_len;
     48  1.1  christos 
     49  1.1  christos   /* Ensure at least one character fits into the buffers.  */
     50  1.1  christos   if (init_len < dfa->mb_cur_max)
     51  1.1  christos     init_len = dfa->mb_cur_max;
     52  1.1  christos   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
     53  1.1  christos   re_string_construct_common (str, len, pstr, trans, icase, dfa);
     54  1.1  christos 
     55  1.1  christos   ret = re_string_realloc_buffers (pstr, init_buf_len);
     56  1.1  christos   if (BE (ret != REG_NOERROR, 0))
     57  1.1  christos     return ret;
     58  1.1  christos 
     59  1.1  christos   pstr->word_char = dfa->word_char;
     60  1.1  christos   pstr->word_ops_used = dfa->word_ops_used;
     61  1.1  christos   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
     62  1.1  christos   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
     63  1.1  christos   pstr->valid_raw_len = pstr->valid_len;
     64  1.1  christos   return REG_NOERROR;
     65  1.1  christos }
     66  1.1  christos 
     67  1.1  christos /* This function allocate the buffers, and initialize them.  */
     68  1.1  christos 
     69  1.1  christos static reg_errcode_t
     70  1.1  christos internal_function
     71  1.1  christos re_string_construct (re_string_t *pstr, const char *str, Idx len,
     72  1.1  christos 		     REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
     73  1.1  christos {
     74  1.1  christos   reg_errcode_t ret;
     75  1.1  christos   memset (pstr, '\0', sizeof (re_string_t));
     76  1.1  christos   re_string_construct_common (str, len, pstr, trans, icase, dfa);
     77  1.1  christos 
     78  1.1  christos   if (len > 0)
     79  1.1  christos     {
     80  1.1  christos       ret = re_string_realloc_buffers (pstr, len + 1);
     81  1.1  christos       if (BE (ret != REG_NOERROR, 0))
     82  1.1  christos 	return ret;
     83  1.1  christos     }
     84  1.1  christos   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
     85  1.1  christos 
     86  1.1  christos   if (icase)
     87  1.1  christos     {
     88  1.1  christos #ifdef RE_ENABLE_I18N
     89  1.1  christos       if (dfa->mb_cur_max > 1)
     90  1.1  christos 	{
     91  1.1  christos 	  while (1)
     92  1.1  christos 	    {
     93  1.1  christos 	      ret = build_wcs_upper_buffer (pstr);
     94  1.1  christos 	      if (BE (ret != REG_NOERROR, 0))
     95  1.1  christos 		return ret;
     96  1.1  christos 	      if (pstr->valid_raw_len >= len)
     97  1.1  christos 		break;
     98  1.1  christos 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
     99  1.1  christos 		break;
    100  1.1  christos 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
    101  1.1  christos 	      if (BE (ret != REG_NOERROR, 0))
    102  1.1  christos 		return ret;
    103  1.1  christos 	    }
    104  1.1  christos 	}
    105  1.1  christos       else
    106  1.1  christos #endif /* RE_ENABLE_I18N  */
    107  1.1  christos 	build_upper_buffer (pstr);
    108  1.1  christos     }
    109  1.1  christos   else
    110  1.1  christos     {
    111  1.1  christos #ifdef RE_ENABLE_I18N
    112  1.1  christos       if (dfa->mb_cur_max > 1)
    113  1.1  christos 	build_wcs_buffer (pstr);
    114  1.1  christos       else
    115  1.1  christos #endif /* RE_ENABLE_I18N  */
    116  1.1  christos 	{
    117  1.1  christos 	  if (trans != NULL)
    118  1.1  christos 	    re_string_translate_buffer (pstr);
    119  1.1  christos 	  else
    120  1.1  christos 	    {
    121  1.1  christos 	      pstr->valid_len = pstr->bufs_len;
    122  1.1  christos 	      pstr->valid_raw_len = pstr->bufs_len;
    123  1.1  christos 	    }
    124  1.1  christos 	}
    125  1.1  christos     }
    126  1.1  christos 
    127  1.1  christos   return REG_NOERROR;
    128  1.1  christos }
    129  1.1  christos 
    130  1.1  christos /* Helper functions for re_string_allocate, and re_string_construct.  */
    131  1.1  christos 
    132  1.1  christos static reg_errcode_t
    133  1.1  christos internal_function
    134  1.1  christos re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
    135  1.1  christos {
    136  1.1  christos #ifdef RE_ENABLE_I18N
    137  1.1  christos   if (pstr->mb_cur_max > 1)
    138  1.1  christos     {
    139  1.1  christos       wint_t *new_wcs = re_xrealloc (pstr->wcs, wint_t, new_buf_len);
    140  1.1  christos       if (BE (new_wcs == NULL, 0))
    141  1.1  christos 	return REG_ESPACE;
    142  1.1  christos       pstr->wcs = new_wcs;
    143  1.1  christos       if (pstr->offsets != NULL)
    144  1.1  christos 	{
    145  1.1  christos 	  Idx *new_offsets = re_xrealloc (pstr->offsets, Idx, new_buf_len);
    146  1.1  christos 	  if (BE (new_offsets == NULL, 0))
    147  1.1  christos 	    return REG_ESPACE;
    148  1.1  christos 	  pstr->offsets = new_offsets;
    149  1.1  christos 	}
    150  1.1  christos     }
    151  1.1  christos #endif /* RE_ENABLE_I18N  */
    152  1.1  christos   if (pstr->mbs_allocated)
    153  1.1  christos     {
    154  1.1  christos       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
    155  1.1  christos 					   new_buf_len);
    156  1.1  christos       if (BE (new_mbs == NULL, 0))
    157  1.1  christos 	return REG_ESPACE;
    158  1.1  christos       pstr->mbs = new_mbs;
    159  1.1  christos     }
    160  1.1  christos   pstr->bufs_len = new_buf_len;
    161  1.1  christos   return REG_NOERROR;
    162  1.1  christos }
    163  1.1  christos 
    164  1.1  christos 
    165  1.1  christos static void
    166  1.1  christos internal_function
    167  1.1  christos re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
    168  1.1  christos 			    REG_TRANSLATE_TYPE trans, bool icase,
    169  1.1  christos 			    const re_dfa_t *dfa)
    170  1.1  christos {
    171  1.1  christos   pstr->raw_mbs = (const unsigned char *) str;
    172  1.1  christos   pstr->len = len;
    173  1.1  christos   pstr->raw_len = len;
    174  1.1  christos   pstr->trans = (unsigned REG_TRANSLATE_TYPE) trans;
    175  1.1  christos   pstr->icase = icase;
    176  1.1  christos   pstr->mbs_allocated = (trans != NULL || icase);
    177  1.1  christos   pstr->mb_cur_max = dfa->mb_cur_max;
    178  1.1  christos   pstr->is_utf8 = dfa->is_utf8;
    179  1.1  christos   pstr->map_notascii = dfa->map_notascii;
    180  1.1  christos   pstr->stop = pstr->len;
    181  1.1  christos   pstr->raw_stop = pstr->stop;
    182  1.1  christos }
    183  1.1  christos 
    184  1.1  christos #ifdef RE_ENABLE_I18N
    185  1.1  christos 
    186  1.1  christos /* Build wide character buffer PSTR->WCS.
    187  1.1  christos    If the byte sequence of the string are:
    188  1.1  christos      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
    189  1.1  christos    Then wide character buffer will be:
    190  1.1  christos      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
    191  1.1  christos    We use WEOF for padding, they indicate that the position isn't
    192  1.1  christos    a first byte of a multibyte character.
    193  1.1  christos 
    194  1.1  christos    Note that this function assumes PSTR->VALID_LEN elements are already
    195  1.1  christos    built and starts from PSTR->VALID_LEN.  */
    196  1.1  christos 
    197  1.1  christos static void
    198  1.1  christos internal_function
    199  1.1  christos build_wcs_buffer (re_string_t *pstr)
    200  1.1  christos {
    201  1.1  christos #ifdef _LIBC
    202  1.1  christos   unsigned char buf[MB_LEN_MAX];
    203  1.1  christos   assert (MB_LEN_MAX >= pstr->mb_cur_max);
    204  1.1  christos #else
    205  1.1  christos   unsigned char buf[64];
    206  1.1  christos #endif
    207  1.1  christos   mbstate_t prev_st;
    208  1.1  christos   Idx byte_idx, end_idx, remain_len;
    209  1.1  christos   size_t mbclen;
    210  1.1  christos 
    211  1.1  christos   /* Build the buffers from pstr->valid_len to either pstr->len or
    212  1.1  christos      pstr->bufs_len.  */
    213  1.1  christos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    214  1.1  christos   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
    215  1.1  christos     {
    216  1.1  christos       wchar_t wc;
    217  1.1  christos       const char *p;
    218  1.1  christos 
    219  1.1  christos       remain_len = end_idx - byte_idx;
    220  1.1  christos       prev_st = pstr->cur_state;
    221  1.1  christos       /* Apply the translation if we need.  */
    222  1.1  christos       if (BE (pstr->trans != NULL, 0))
    223  1.1  christos 	{
    224  1.1  christos 	  int i, ch;
    225  1.1  christos 
    226  1.1  christos 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
    227  1.1  christos 	    {
    228  1.1  christos 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
    229  1.1  christos 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
    230  1.1  christos 	    }
    231  1.1  christos 	  p = (const char *) buf;
    232  1.1  christos 	}
    233  1.1  christos       else
    234  1.1  christos 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
    235  1.1  christos       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
    236  1.1  christos       if (BE (mbclen == (size_t) -2, 0))
    237  1.1  christos 	{
    238  1.1  christos 	  /* The buffer doesn't have enough space, finish to build.  */
    239  1.1  christos 	  pstr->cur_state = prev_st;
    240  1.1  christos 	  break;
    241  1.1  christos 	}
    242  1.1  christos       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
    243  1.1  christos 	{
    244  1.1  christos 	  /* We treat these cases as a singlebyte character.  */
    245  1.1  christos 	  mbclen = 1;
    246  1.1  christos 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
    247  1.1  christos 	  if (BE (pstr->trans != NULL, 0))
    248  1.1  christos 	    wc = pstr->trans[wc];
    249  1.1  christos 	  pstr->cur_state = prev_st;
    250  1.1  christos 	}
    251  1.1  christos 
    252  1.1  christos       /* Write wide character and padding.  */
    253  1.1  christos       pstr->wcs[byte_idx++] = wc;
    254  1.1  christos       /* Write paddings.  */
    255  1.1  christos       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    256  1.1  christos 	pstr->wcs[byte_idx++] = WEOF;
    257  1.1  christos     }
    258  1.1  christos   pstr->valid_len = byte_idx;
    259  1.1  christos   pstr->valid_raw_len = byte_idx;
    260  1.1  christos }
    261  1.1  christos 
    262  1.1  christos /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
    263  1.1  christos    but for REG_ICASE.  */
    264  1.1  christos 
    265  1.1  christos static reg_errcode_t
    266  1.1  christos internal_function
    267  1.1  christos build_wcs_upper_buffer (re_string_t *pstr)
    268  1.1  christos {
    269  1.1  christos   mbstate_t prev_st;
    270  1.1  christos   Idx src_idx, byte_idx, end_idx, remain_len;
    271  1.1  christos   size_t mbclen;
    272  1.1  christos #ifdef _LIBC
    273  1.1  christos   char buf[MB_LEN_MAX];
    274  1.1  christos   assert (MB_LEN_MAX >= pstr->mb_cur_max);
    275  1.1  christos #else
    276  1.1  christos   char buf[64];
    277  1.1  christos #endif
    278  1.1  christos 
    279  1.1  christos   byte_idx = pstr->valid_len;
    280  1.1  christos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    281  1.1  christos 
    282  1.1  christos   /* The following optimization assumes that ASCII characters can be
    283  1.1  christos      mapped to wide characters with a simple cast.  */
    284  1.1  christos   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
    285  1.1  christos     {
    286  1.1  christos       while (byte_idx < end_idx)
    287  1.1  christos 	{
    288  1.1  christos 	  wchar_t wc;
    289  1.1  christos 
    290  1.1  christos 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
    291  1.1  christos 	      && mbsinit (&pstr->cur_state))
    292  1.1  christos 	    {
    293  1.1  christos 	      /* In case of a singlebyte character.  */
    294  1.1  christos 	      pstr->mbs[byte_idx]
    295  1.1  christos 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
    296  1.1  christos 	      /* The next step uses the assumption that wchar_t is encoded
    297  1.1  christos 		 ASCII-safe: all ASCII values can be converted like this.  */
    298  1.1  christos 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
    299  1.1  christos 	      ++byte_idx;
    300  1.1  christos 	      continue;
    301  1.1  christos 	    }
    302  1.1  christos 
    303  1.1  christos 	  remain_len = end_idx - byte_idx;
    304  1.1  christos 	  prev_st = pstr->cur_state;
    305  1.1  christos 	  mbclen = mbrtowc (&wc,
    306  1.1  christos 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
    307  1.1  christos 			     + byte_idx), remain_len, &pstr->cur_state);
    308  1.1  christos 	  if (BE ((size_t) (mbclen + 2) > 2, 1))
    309  1.1  christos 	    {
    310  1.1  christos 	      wchar_t wcu = wc;
    311  1.1  christos 	      if (iswlower (wc))
    312  1.1  christos 		{
    313  1.1  christos 		  size_t mbcdlen;
    314  1.1  christos 
    315  1.1  christos 		  wcu = towupper (wc);
    316  1.1  christos 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
    317  1.1  christos 		  if (BE (mbclen == mbcdlen, 1))
    318  1.1  christos 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
    319  1.1  christos 		  else
    320  1.1  christos 		    {
    321  1.1  christos 		      src_idx = byte_idx;
    322  1.1  christos 		      goto offsets_needed;
    323  1.1  christos 		    }
    324  1.1  christos 		}
    325  1.1  christos 	      else
    326  1.1  christos 		memcpy (pstr->mbs + byte_idx,
    327  1.1  christos 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
    328  1.1  christos 	      pstr->wcs[byte_idx++] = wcu;
    329  1.1  christos 	      /* Write paddings.  */
    330  1.1  christos 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    331  1.1  christos 		pstr->wcs[byte_idx++] = WEOF;
    332  1.1  christos 	    }
    333  1.1  christos 	  else if (mbclen == (size_t) -1 || mbclen == 0)
    334  1.1  christos 	    {
    335  1.1  christos 	      /* It is an invalid character or '\0'.  Just use the byte.  */
    336  1.1  christos 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
    337  1.1  christos 	      pstr->mbs[byte_idx] = ch;
    338  1.1  christos 	      /* And also cast it to wide char.  */
    339  1.1  christos 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
    340  1.1  christos 	      if (BE (mbclen == (size_t) -1, 0))
    341  1.1  christos 		pstr->cur_state = prev_st;
    342  1.1  christos 	    }
    343  1.1  christos 	  else
    344  1.1  christos 	    {
    345  1.1  christos 	      /* The buffer doesn't have enough space, finish to build.  */
    346  1.1  christos 	      pstr->cur_state = prev_st;
    347  1.1  christos 	      break;
    348  1.1  christos 	    }
    349  1.1  christos 	}
    350  1.1  christos       pstr->valid_len = byte_idx;
    351  1.1  christos       pstr->valid_raw_len = byte_idx;
    352  1.1  christos       return REG_NOERROR;
    353  1.1  christos     }
    354  1.1  christos   else
    355  1.1  christos     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
    356  1.1  christos       {
    357  1.1  christos 	wchar_t wc;
    358  1.1  christos 	const char *p;
    359  1.1  christos       offsets_needed:
    360  1.1  christos 	remain_len = end_idx - byte_idx;
    361  1.1  christos 	prev_st = pstr->cur_state;
    362  1.1  christos 	if (BE (pstr->trans != NULL, 0))
    363  1.1  christos 	  {
    364  1.1  christos 	    int i, ch;
    365  1.1  christos 
    366  1.1  christos 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
    367  1.1  christos 	      {
    368  1.1  christos 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
    369  1.1  christos 		buf[i] = pstr->trans[ch];
    370  1.1  christos 	      }
    371  1.1  christos 	    p = (const char *) buf;
    372  1.1  christos 	  }
    373  1.1  christos 	else
    374  1.1  christos 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
    375  1.1  christos 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
    376  1.1  christos 	if (BE ((size_t) (mbclen + 2) > 2, 1))
    377  1.1  christos 	  {
    378  1.1  christos 	    wchar_t wcu = wc;
    379  1.1  christos 	    if (iswlower (wc))
    380  1.1  christos 	      {
    381  1.1  christos 		size_t mbcdlen;
    382  1.1  christos 
    383  1.1  christos 		wcu = towupper (wc);
    384  1.1  christos 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
    385  1.1  christos 		if (BE (mbclen == mbcdlen, 1))
    386  1.1  christos 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
    387  1.1  christos 		else if (mbcdlen != (size_t) -1)
    388  1.1  christos 		  {
    389  1.1  christos 		    size_t i;
    390  1.1  christos 
    391  1.1  christos 		    if (byte_idx + mbcdlen > pstr->bufs_len)
    392  1.1  christos 		      {
    393  1.1  christos 			pstr->cur_state = prev_st;
    394  1.1  christos 			break;
    395  1.1  christos 		      }
    396  1.1  christos 
    397  1.1  christos 		    if (pstr->offsets == NULL)
    398  1.1  christos 		      {
    399  1.1  christos 			pstr->offsets = re_xmalloc (Idx, pstr->bufs_len);
    400  1.1  christos 
    401  1.1  christos 			if (pstr->offsets == NULL)
    402  1.1  christos 			  return REG_ESPACE;
    403  1.1  christos 		      }
    404  1.1  christos 		    if (!pstr->offsets_needed)
    405  1.1  christos 		      {
    406  1.1  christos 			for (i = 0; i < (size_t) byte_idx; ++i)
    407  1.1  christos 			  pstr->offsets[i] = i;
    408  1.1  christos 			pstr->offsets_needed = 1;
    409  1.1  christos 		      }
    410  1.1  christos 
    411  1.1  christos 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
    412  1.1  christos 		    pstr->wcs[byte_idx] = wcu;
    413  1.1  christos 		    pstr->offsets[byte_idx] = src_idx;
    414  1.1  christos 		    for (i = 1; i < mbcdlen; ++i)
    415  1.1  christos 		      {
    416  1.1  christos 			pstr->offsets[byte_idx + i]
    417  1.1  christos 			  = src_idx + (i < mbclen ? i : mbclen - 1);
    418  1.1  christos 			pstr->wcs[byte_idx + i] = WEOF;
    419  1.1  christos 		      }
    420  1.1  christos 		    pstr->len += mbcdlen - mbclen;
    421  1.1  christos 		    if (pstr->raw_stop > src_idx)
    422  1.1  christos 		      pstr->stop += mbcdlen - mbclen;
    423  1.1  christos 		    end_idx = (pstr->bufs_len > pstr->len)
    424  1.1  christos 			      ? pstr->len : pstr->bufs_len;
    425  1.1  christos 		    byte_idx += mbcdlen;
    426  1.1  christos 		    src_idx += mbclen;
    427  1.1  christos 		    continue;
    428  1.1  christos 		  }
    429  1.1  christos                 else
    430  1.1  christos                   memcpy (pstr->mbs + byte_idx, p, mbclen);
    431  1.1  christos 	      }
    432  1.1  christos 	    else
    433  1.1  christos 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
    434  1.1  christos 
    435  1.1  christos 	    if (BE (pstr->offsets_needed != 0, 0))
    436  1.1  christos 	      {
    437  1.1  christos 		size_t i;
    438  1.1  christos 		for (i = 0; i < mbclen; ++i)
    439  1.1  christos 		  pstr->offsets[byte_idx + i] = src_idx + i;
    440  1.1  christos 	      }
    441  1.1  christos 	    src_idx += mbclen;
    442  1.1  christos 
    443  1.1  christos 	    pstr->wcs[byte_idx++] = wcu;
    444  1.1  christos 	    /* Write paddings.  */
    445  1.1  christos 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    446  1.1  christos 	      pstr->wcs[byte_idx++] = WEOF;
    447  1.1  christos 	  }
    448  1.1  christos 	else if (mbclen == (size_t) -1 || mbclen == 0)
    449  1.1  christos 	  {
    450  1.1  christos 	    /* It is an invalid character or '\0'.  Just use the byte.  */
    451  1.1  christos 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
    452  1.1  christos 
    453  1.1  christos 	    if (BE (pstr->trans != NULL, 0))
    454  1.1  christos 	      ch = pstr->trans [ch];
    455  1.1  christos 	    pstr->mbs[byte_idx] = ch;
    456  1.1  christos 
    457  1.1  christos 	    if (BE (pstr->offsets_needed != 0, 0))
    458  1.1  christos 	      pstr->offsets[byte_idx] = src_idx;
    459  1.1  christos 	    ++src_idx;
    460  1.1  christos 
    461  1.1  christos 	    /* And also cast it to wide char.  */
    462  1.1  christos 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
    463  1.1  christos 	    if (BE (mbclen == (size_t) -1, 0))
    464  1.1  christos 	      pstr->cur_state = prev_st;
    465  1.1  christos 	  }
    466  1.1  christos 	else
    467  1.1  christos 	  {
    468  1.1  christos 	    /* The buffer doesn't have enough space, finish to build.  */
    469  1.1  christos 	    pstr->cur_state = prev_st;
    470  1.1  christos 	    break;
    471  1.1  christos 	  }
    472  1.1  christos       }
    473  1.1  christos   pstr->valid_len = byte_idx;
    474  1.1  christos   pstr->valid_raw_len = src_idx;
    475  1.1  christos   return REG_NOERROR;
    476  1.1  christos }
    477  1.1  christos 
    478  1.1  christos /* Skip characters until the index becomes greater than NEW_RAW_IDX.
    479  1.1  christos    Return the index.  */
    480  1.1  christos 
    481  1.1  christos static Idx
    482  1.1  christos internal_function
    483  1.1  christos re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
    484  1.1  christos {
    485  1.1  christos   mbstate_t prev_st;
    486  1.1  christos   Idx rawbuf_idx;
    487  1.1  christos   size_t mbclen;
    488  1.1  christos   wchar_t wc = 0;
    489  1.1  christos 
    490  1.1  christos   /* Skip the characters which are not necessary to check.  */
    491  1.1  christos   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
    492  1.1  christos        rawbuf_idx < new_raw_idx;)
    493  1.1  christos     {
    494  1.1  christos       Idx remain_len;
    495  1.1  christos       remain_len = pstr->len - rawbuf_idx;
    496  1.1  christos       prev_st = pstr->cur_state;
    497  1.1  christos       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
    498  1.1  christos 			remain_len, &pstr->cur_state);
    499  1.1  christos       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
    500  1.1  christos 	{
    501  1.1  christos 	  /* We treat these cases as a singlebyte character.  */
    502  1.1  christos 	  mbclen = 1;
    503  1.1  christos 	  pstr->cur_state = prev_st;
    504  1.1  christos 	}
    505  1.1  christos       /* Then proceed the next character.  */
    506  1.1  christos       rawbuf_idx += mbclen;
    507  1.1  christos     }
    508  1.1  christos   *last_wc = (wint_t) wc;
    509  1.1  christos   return rawbuf_idx;
    510  1.1  christos }
    511  1.1  christos #endif /* RE_ENABLE_I18N  */
    512  1.1  christos 
    513  1.1  christos /* Build the buffer PSTR->MBS, and apply the translation if we need.
    514  1.1  christos    This function is used in case of REG_ICASE.  */
    515  1.1  christos 
    516  1.1  christos static void
    517  1.1  christos internal_function
    518  1.1  christos build_upper_buffer (re_string_t *pstr)
    519  1.1  christos {
    520  1.1  christos   Idx char_idx, end_idx;
    521  1.1  christos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    522  1.1  christos 
    523  1.1  christos   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
    524  1.1  christos     {
    525  1.1  christos       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
    526  1.1  christos       if (BE (pstr->trans != NULL, 0))
    527  1.1  christos 	ch = pstr->trans[ch];
    528  1.1  christos       if (islower (ch))
    529  1.1  christos 	pstr->mbs[char_idx] = toupper (ch);
    530  1.1  christos       else
    531  1.1  christos 	pstr->mbs[char_idx] = ch;
    532  1.1  christos     }
    533  1.1  christos   pstr->valid_len = char_idx;
    534  1.1  christos   pstr->valid_raw_len = char_idx;
    535  1.1  christos }
    536  1.1  christos 
    537  1.1  christos /* Apply TRANS to the buffer in PSTR.  */
    538  1.1  christos 
    539  1.1  christos static void
    540  1.1  christos internal_function
    541  1.1  christos re_string_translate_buffer (re_string_t *pstr)
    542  1.1  christos {
    543  1.1  christos   Idx buf_idx, end_idx;
    544  1.1  christos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    545  1.1  christos 
    546  1.1  christos   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
    547  1.1  christos     {
    548  1.1  christos       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
    549  1.1  christos       pstr->mbs[buf_idx] = pstr->trans[ch];
    550  1.1  christos     }
    551  1.1  christos 
    552  1.1  christos   pstr->valid_len = buf_idx;
    553  1.1  christos   pstr->valid_raw_len = buf_idx;
    554  1.1  christos }
    555  1.1  christos 
    556  1.1  christos /* This function re-construct the buffers.
    557  1.1  christos    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
    558  1.1  christos    convert to upper case in case of REG_ICASE, apply translation.  */
    559  1.1  christos 
    560  1.1  christos static reg_errcode_t
    561  1.1  christos internal_function
    562  1.1  christos re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
    563  1.1  christos {
    564  1.1  christos   Idx offset;
    565  1.1  christos 
    566  1.1  christos   if (BE (pstr->raw_mbs_idx <= idx, 0))
    567  1.1  christos     offset = idx - pstr->raw_mbs_idx;
    568  1.1  christos   else
    569  1.1  christos     {
    570  1.1  christos       /* Reset buffer.  */
    571  1.1  christos #ifdef RE_ENABLE_I18N
    572  1.1  christos       if (pstr->mb_cur_max > 1)
    573  1.1  christos 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
    574  1.1  christos #endif /* RE_ENABLE_I18N */
    575  1.1  christos       pstr->len = pstr->raw_len;
    576  1.1  christos       pstr->stop = pstr->raw_stop;
    577  1.1  christos       pstr->valid_len = 0;
    578  1.1  christos       pstr->raw_mbs_idx = 0;
    579  1.1  christos       pstr->valid_raw_len = 0;
    580  1.1  christos       pstr->offsets_needed = 0;
    581  1.1  christos       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
    582  1.1  christos 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
    583  1.1  christos       if (!pstr->mbs_allocated)
    584  1.1  christos 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
    585  1.1  christos       offset = idx;
    586  1.1  christos     }
    587  1.1  christos 
    588  1.1  christos   if (BE (offset != 0, 1))
    589  1.1  christos     {
    590  1.1  christos       /* Are the characters which are already checked remain?  */
    591  1.1  christos       if (BE (offset < pstr->valid_raw_len, 1)
    592  1.1  christos #ifdef RE_ENABLE_I18N
    593  1.1  christos 	  /* Handling this would enlarge the code too much.
    594  1.1  christos 	     Accept a slowdown in that case.  */
    595  1.1  christos 	  && pstr->offsets_needed == 0
    596  1.1  christos #endif
    597  1.1  christos 	 )
    598  1.1  christos 	{
    599  1.1  christos 	  /* Yes, move them to the front of the buffer.  */
    600  1.1  christos 	  pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
    601  1.1  christos #ifdef RE_ENABLE_I18N
    602  1.1  christos 	  if (pstr->mb_cur_max > 1)
    603  1.1  christos 	    memmove (pstr->wcs, pstr->wcs + offset,
    604  1.1  christos 		     (pstr->valid_len - offset) * sizeof (wint_t));
    605  1.1  christos #endif /* RE_ENABLE_I18N */
    606  1.1  christos 	  if (BE (pstr->mbs_allocated, 0))
    607  1.1  christos 	    memmove (pstr->mbs, pstr->mbs + offset,
    608  1.1  christos 		     pstr->valid_len - offset);
    609  1.1  christos 	  pstr->valid_len -= offset;
    610  1.1  christos 	  pstr->valid_raw_len -= offset;
    611  1.1  christos #if DEBUG
    612  1.1  christos 	  assert (pstr->valid_len > 0);
    613  1.1  christos #endif
    614  1.1  christos 	}
    615  1.1  christos       else
    616  1.1  christos 	{
    617  1.1  christos 	  /* No, skip all characters until IDX.  */
    618  1.1  christos #ifdef RE_ENABLE_I18N
    619  1.1  christos 	  if (BE (pstr->offsets_needed, 0))
    620  1.1  christos 	    {
    621  1.1  christos 	      pstr->len = pstr->raw_len - idx + offset;
    622  1.1  christos 	      pstr->stop = pstr->raw_stop - idx + offset;
    623  1.1  christos 	      pstr->offsets_needed = 0;
    624  1.1  christos 	    }
    625  1.1  christos #endif
    626  1.1  christos 	  pstr->valid_len = 0;
    627  1.1  christos 	  pstr->valid_raw_len = 0;
    628  1.1  christos #ifdef RE_ENABLE_I18N
    629  1.1  christos 	  if (pstr->mb_cur_max > 1)
    630  1.1  christos 	    {
    631  1.1  christos 	      Idx wcs_idx;
    632  1.1  christos 	      wint_t wc = WEOF;
    633  1.1  christos 
    634  1.1  christos 	      if (pstr->is_utf8)
    635  1.1  christos 		{
    636  1.1  christos 		  const unsigned char *raw, *p, *q, *end;
    637  1.1  christos 
    638  1.1  christos 		  /* Special case UTF-8.  Multi-byte chars start with any
    639  1.1  christos 		     byte other than 0x80 - 0xbf.  */
    640  1.1  christos 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
    641  1.1  christos 		  end = raw + (offset - pstr->mb_cur_max);
    642  1.1  christos 		  for (p = raw + offset - 1; p >= end; --p)
    643  1.1  christos 		    if ((*p & 0xc0) != 0x80)
    644  1.1  christos 		      {
    645  1.1  christos 			mbstate_t cur_state;
    646  1.1  christos 			wchar_t wc2;
    647  1.1  christos 			Idx mlen = raw + pstr->len - p;
    648  1.1  christos 			unsigned char buf[6];
    649  1.1  christos 			size_t mbclen;
    650  1.1  christos 
    651  1.1  christos 			q = p;
    652  1.1  christos 			if (BE (pstr->trans != NULL, 0))
    653  1.1  christos 			  {
    654  1.1  christos 			    int i = mlen < 6 ? mlen : 6;
    655  1.1  christos 			    while (--i >= 0)
    656  1.1  christos 			      buf[i] = pstr->trans[p[i]];
    657  1.1  christos 			    q = buf;
    658  1.1  christos 			  }
    659  1.1  christos 			/* XXX Don't use mbrtowc, we know which conversion
    660  1.1  christos 			   to use (UTF-8 -> UCS4).  */
    661  1.1  christos 			memset (&cur_state, 0, sizeof (cur_state));
    662  1.1  christos 			mbclen = mbrtowc (&wc2, (const char *) p, mlen,
    663  1.1  christos 					  &cur_state);
    664  1.1  christos 			if (raw + offset - p <= mbclen && mbclen < (size_t) -2)
    665  1.1  christos 			  {
    666  1.1  christos 			    memset (&pstr->cur_state, '\0',
    667  1.1  christos 				    sizeof (mbstate_t));
    668  1.1  christos 			    pstr->valid_len = mbclen - (raw + offset - p);
    669  1.1  christos 			    wc = wc2;
    670  1.1  christos 			  }
    671  1.1  christos 			break;
    672  1.1  christos 		      }
    673  1.1  christos 		}
    674  1.1  christos 
    675  1.1  christos 	      if (wc == WEOF)
    676  1.1  christos 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
    677  1.1  christos 	      if (BE (pstr->valid_len, 0))
    678  1.1  christos 		{
    679  1.1  christos 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
    680  1.1  christos 		    pstr->wcs[wcs_idx] = WEOF;
    681  1.1  christos 		  if (pstr->mbs_allocated)
    682  1.1  christos 		    memset (pstr->mbs, -1, pstr->valid_len);
    683  1.1  christos 		}
    684  1.1  christos 	      pstr->valid_raw_len = pstr->valid_len;
    685  1.1  christos 	      pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
    686  1.1  christos 				    && IS_WIDE_WORD_CHAR (wc))
    687  1.1  christos 				   ? CONTEXT_WORD
    688  1.1  christos 				   : ((IS_WIDE_NEWLINE (wc)
    689  1.1  christos 				       && pstr->newline_anchor)
    690  1.1  christos 				      ? CONTEXT_NEWLINE : 0));
    691  1.1  christos 	    }
    692  1.1  christos 	  else
    693  1.1  christos #endif /* RE_ENABLE_I18N */
    694  1.1  christos 	    {
    695  1.1  christos 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
    696  1.1  christos 	      if (pstr->trans)
    697  1.1  christos 		c = pstr->trans[c];
    698  1.1  christos 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
    699  1.1  christos 				   ? CONTEXT_WORD
    700  1.1  christos 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
    701  1.1  christos 				      ? CONTEXT_NEWLINE : 0));
    702  1.1  christos 	    }
    703  1.1  christos 	}
    704  1.1  christos       if (!BE (pstr->mbs_allocated, 0))
    705  1.1  christos 	pstr->mbs += offset;
    706  1.1  christos     }
    707  1.1  christos   pstr->raw_mbs_idx = idx;
    708  1.1  christos   pstr->len -= offset;
    709  1.1  christos   pstr->stop -= offset;
    710  1.1  christos 
    711  1.1  christos   /* Then build the buffers.  */
    712  1.1  christos #ifdef RE_ENABLE_I18N
    713  1.1  christos   if (pstr->mb_cur_max > 1)
    714  1.1  christos     {
    715  1.1  christos       if (pstr->icase)
    716  1.1  christos 	{
    717  1.1  christos 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
    718  1.1  christos 	  if (BE (ret != REG_NOERROR, 0))
    719  1.1  christos 	    return ret;
    720  1.1  christos 	}
    721  1.1  christos       else
    722  1.1  christos 	build_wcs_buffer (pstr);
    723  1.1  christos     }
    724  1.1  christos   else
    725  1.1  christos #endif /* RE_ENABLE_I18N */
    726  1.1  christos   if (BE (pstr->mbs_allocated, 0))
    727  1.1  christos     {
    728  1.1  christos       if (pstr->icase)
    729  1.1  christos 	build_upper_buffer (pstr);
    730  1.1  christos       else if (pstr->trans != NULL)
    731  1.1  christos 	re_string_translate_buffer (pstr);
    732  1.1  christos     }
    733  1.1  christos   else
    734  1.1  christos     pstr->valid_len = pstr->len;
    735  1.1  christos 
    736  1.1  christos   pstr->cur_idx = 0;
    737  1.1  christos   return REG_NOERROR;
    738  1.1  christos }
    739  1.1  christos 
    740  1.1  christos static unsigned char
    741  1.1  christos internal_function __attribute ((pure))
    742  1.1  christos re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
    743  1.1  christos {
    744  1.1  christos   int ch;
    745  1.1  christos   Idx off;
    746  1.1  christos 
    747  1.1  christos   /* Handle the common (easiest) cases first.  */
    748  1.1  christos   if (BE (!pstr->mbs_allocated, 1))
    749  1.1  christos     return re_string_peek_byte (pstr, idx);
    750  1.1  christos 
    751  1.1  christos #ifdef RE_ENABLE_I18N
    752  1.1  christos   if (pstr->mb_cur_max > 1
    753  1.1  christos       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
    754  1.1  christos     return re_string_peek_byte (pstr, idx);
    755  1.1  christos #endif
    756  1.1  christos 
    757  1.1  christos   off = pstr->cur_idx + idx;
    758  1.1  christos #ifdef RE_ENABLE_I18N
    759  1.1  christos   if (pstr->offsets_needed)
    760  1.1  christos     off = pstr->offsets[off];
    761  1.1  christos #endif
    762  1.1  christos 
    763  1.1  christos   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
    764  1.1  christos 
    765  1.1  christos #ifdef RE_ENABLE_I18N
    766  1.1  christos   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
    767  1.1  christos      this function returns CAPITAL LETTER I instead of first byte of
    768  1.1  christos      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
    769  1.1  christos      since peek_byte_case doesn't advance cur_idx in any way.  */
    770  1.1  christos   if (pstr->offsets_needed && !isascii (ch))
    771  1.1  christos     return re_string_peek_byte (pstr, idx);
    772  1.1  christos #endif
    773  1.1  christos 
    774  1.1  christos   return ch;
    775  1.1  christos }
    776  1.1  christos 
    777  1.1  christos static unsigned char
    778  1.1  christos internal_function __attribute ((pure))
    779  1.1  christos re_string_fetch_byte_case (re_string_t *pstr)
    780  1.1  christos {
    781  1.1  christos   if (BE (!pstr->mbs_allocated, 1))
    782  1.1  christos     return re_string_fetch_byte (pstr);
    783  1.1  christos 
    784  1.1  christos #ifdef RE_ENABLE_I18N
    785  1.1  christos   if (pstr->offsets_needed)
    786  1.1  christos     {
    787  1.1  christos       Idx off;
    788  1.1  christos       int ch;
    789  1.1  christos 
    790  1.1  christos       /* For tr_TR.UTF-8 [[:islower:]] there is
    791  1.1  christos 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
    792  1.1  christos 	 in that case the whole multi-byte character and return
    793  1.1  christos 	 the original letter.  On the other side, with
    794  1.1  christos 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
    795  1.1  christos 	 anything else would complicate things too much.  */
    796  1.1  christos 
    797  1.1  christos       if (!re_string_first_byte (pstr, pstr->cur_idx))
    798  1.1  christos 	return re_string_fetch_byte (pstr);
    799  1.1  christos 
    800  1.1  christos       off = pstr->offsets[pstr->cur_idx];
    801  1.1  christos       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
    802  1.1  christos 
    803  1.1  christos       if (! isascii (ch))
    804  1.1  christos 	return re_string_fetch_byte (pstr);
    805  1.1  christos 
    806  1.1  christos       re_string_skip_bytes (pstr,
    807  1.1  christos 			    re_string_char_size_at (pstr, pstr->cur_idx));
    808  1.1  christos       return ch;
    809  1.1  christos     }
    810  1.1  christos #endif
    811  1.1  christos 
    812  1.1  christos   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
    813  1.1  christos }
    814  1.1  christos 
    815  1.1  christos static void
    816  1.1  christos internal_function
    817  1.1  christos re_string_destruct (re_string_t *pstr)
    818  1.1  christos {
    819  1.1  christos #ifdef RE_ENABLE_I18N
    820  1.1  christos   re_free (pstr->wcs);
    821  1.1  christos   re_free (pstr->offsets);
    822  1.1  christos #endif /* RE_ENABLE_I18N  */
    823  1.1  christos   if (pstr->mbs_allocated)
    824  1.1  christos     re_free (pstr->mbs);
    825  1.1  christos }
    826  1.1  christos 
    827  1.1  christos /* Return the context at IDX in INPUT.  */
    828  1.1  christos 
    829  1.1  christos static unsigned int
    830  1.1  christos internal_function
    831  1.1  christos re_string_context_at (const re_string_t *input, Idx idx, int eflags)
    832  1.1  christos {
    833  1.1  christos   int c;
    834  1.1  christos   if (BE (! REG_VALID_INDEX (idx), 0))
    835  1.1  christos     /* In this case, we use the value stored in input->tip_context,
    836  1.1  christos        since we can't know the character in input->mbs[-1] here.  */
    837  1.1  christos     return input->tip_context;
    838  1.1  christos   if (BE (idx == input->len, 0))
    839  1.1  christos     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
    840  1.1  christos 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
    841  1.1  christos #ifdef RE_ENABLE_I18N
    842  1.1  christos   if (input->mb_cur_max > 1)
    843  1.1  christos     {
    844  1.1  christos       wint_t wc;
    845  1.1  christos       Idx wc_idx = idx;
    846  1.1  christos       while(input->wcs[wc_idx] == WEOF)
    847  1.1  christos 	{
    848  1.1  christos #ifdef DEBUG
    849  1.1  christos 	  /* It must not happen.  */
    850  1.1  christos 	  assert (REG_VALID_INDEX (wc_idx));
    851  1.1  christos #endif
    852  1.1  christos 	  --wc_idx;
    853  1.1  christos 	  if (! REG_VALID_INDEX (wc_idx))
    854  1.1  christos 	    return input->tip_context;
    855  1.1  christos 	}
    856  1.1  christos       wc = input->wcs[wc_idx];
    857  1.1  christos       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
    858  1.1  christos 	return CONTEXT_WORD;
    859  1.1  christos       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
    860  1.1  christos 	      ? CONTEXT_NEWLINE : 0);
    861  1.1  christos     }
    862  1.1  christos   else
    863  1.1  christos #endif
    864  1.1  christos     {
    865  1.1  christos       c = re_string_byte_at (input, idx);
    866  1.1  christos       if (bitset_contain (input->word_char, c))
    867  1.1  christos 	return CONTEXT_WORD;
    868  1.1  christos       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
    869  1.1  christos     }
    870  1.1  christos }
    871  1.1  christos 
    872  1.1  christos /* Functions for set operation.  */
    874  1.1  christos 
    875  1.1  christos static reg_errcode_t
    876  1.1  christos internal_function
    877  1.1  christos re_node_set_alloc (re_node_set *set, Idx size)
    878  1.1  christos {
    879  1.1  christos   set->alloc = size;
    880  1.1  christos   set->nelem = 0;
    881  1.1  christos   set->elems = re_xmalloc (Idx, size);
    882  1.1  christos   if (BE (set->elems == NULL, 0))
    883  1.1  christos     return REG_ESPACE;
    884  1.1  christos   return REG_NOERROR;
    885  1.1  christos }
    886  1.1  christos 
    887  1.1  christos static reg_errcode_t
    888  1.1  christos internal_function
    889  1.1  christos re_node_set_init_1 (re_node_set *set, Idx elem)
    890  1.1  christos {
    891  1.1  christos   set->alloc = 1;
    892  1.1  christos   set->nelem = 1;
    893  1.1  christos   set->elems = re_malloc (Idx, 1);
    894  1.1  christos   if (BE (set->elems == NULL, 0))
    895  1.1  christos     {
    896  1.1  christos       set->alloc = set->nelem = 0;
    897  1.1  christos       return REG_ESPACE;
    898  1.1  christos     }
    899  1.1  christos   set->elems[0] = elem;
    900  1.1  christos   return REG_NOERROR;
    901  1.1  christos }
    902  1.1  christos 
    903  1.1  christos static reg_errcode_t
    904  1.1  christos internal_function
    905  1.1  christos re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
    906  1.1  christos {
    907  1.1  christos   set->alloc = 2;
    908  1.1  christos   set->elems = re_malloc (Idx, 2);
    909  1.1  christos   if (BE (set->elems == NULL, 0))
    910  1.1  christos     return REG_ESPACE;
    911  1.1  christos   if (elem1 == elem2)
    912  1.1  christos     {
    913  1.1  christos       set->nelem = 1;
    914  1.1  christos       set->elems[0] = elem1;
    915  1.1  christos     }
    916  1.1  christos   else
    917  1.1  christos     {
    918  1.1  christos       set->nelem = 2;
    919  1.1  christos       if (elem1 < elem2)
    920  1.1  christos 	{
    921  1.1  christos 	  set->elems[0] = elem1;
    922  1.1  christos 	  set->elems[1] = elem2;
    923  1.1  christos 	}
    924  1.1  christos       else
    925  1.1  christos 	{
    926  1.1  christos 	  set->elems[0] = elem2;
    927  1.1  christos 	  set->elems[1] = elem1;
    928  1.1  christos 	}
    929  1.1  christos     }
    930  1.1  christos   return REG_NOERROR;
    931  1.1  christos }
    932  1.1  christos 
    933  1.1  christos static reg_errcode_t
    934  1.1  christos internal_function
    935  1.1  christos re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
    936  1.1  christos {
    937  1.1  christos   dest->nelem = src->nelem;
    938  1.1  christos   if (src->nelem > 0)
    939  1.1  christos     {
    940  1.1  christos       dest->alloc = dest->nelem;
    941  1.1  christos       dest->elems = re_malloc (Idx, dest->alloc);
    942  1.1  christos       if (BE (dest->elems == NULL, 0))
    943  1.1  christos 	{
    944  1.1  christos 	  dest->alloc = dest->nelem = 0;
    945  1.1  christos 	  return REG_ESPACE;
    946  1.1  christos 	}
    947  1.1  christos       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
    948  1.1  christos     }
    949  1.1  christos   else
    950  1.1  christos     re_node_set_init_empty (dest);
    951  1.1  christos   return REG_NOERROR;
    952  1.1  christos }
    953  1.1  christos 
    954  1.1  christos /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
    955  1.1  christos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
    956  1.1  christos    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
    957  1.1  christos 
    958  1.1  christos static reg_errcode_t
    959  1.1  christos internal_function
    960  1.1  christos re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
    961  1.1  christos 			   const re_node_set *src2)
    962  1.1  christos {
    963  1.1  christos   Idx i1, i2, is, id, delta, sbase;
    964  1.1  christos   if (src1->nelem == 0 || src2->nelem == 0)
    965  1.1  christos     return REG_NOERROR;
    966  1.1  christos 
    967  1.1  christos   /* We need dest->nelem + 2 * elems_in_intersection; this is a
    968  1.1  christos      conservative estimate.  */
    969  1.1  christos   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
    970  1.1  christos     {
    971  1.1  christos       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
    972  1.1  christos       Idx *new_elems;
    973  1.1  christos       if (sizeof (Idx) < 3
    974  1.1  christos 	  && (new_alloc < dest->alloc
    975  1.1  christos 	      || ((Idx) (src1->nelem + src2->nelem) < src1->nelem)))
    976  1.1  christos 	return REG_ESPACE;
    977  1.1  christos       new_elems = re_xrealloc (dest->elems, Idx, new_alloc);
    978  1.1  christos       if (BE (new_elems == NULL, 0))
    979  1.1  christos         return REG_ESPACE;
    980  1.1  christos       dest->elems = new_elems;
    981  1.1  christos       dest->alloc = new_alloc;
    982  1.1  christos     }
    983  1.1  christos 
    984  1.1  christos   /* Find the items in the intersection of SRC1 and SRC2, and copy
    985  1.1  christos      into the top of DEST those that are not already in DEST itself.  */
    986  1.1  christos   sbase = dest->nelem + src1->nelem + src2->nelem;
    987  1.1  christos   i1 = src1->nelem - 1;
    988  1.1  christos   i2 = src2->nelem - 1;
    989  1.1  christos   id = dest->nelem - 1;
    990  1.1  christos   for (;;)
    991  1.1  christos     {
    992  1.1  christos       if (src1->elems[i1] == src2->elems[i2])
    993  1.1  christos 	{
    994  1.1  christos 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
    995  1.1  christos 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
    996  1.1  christos 	    --id;
    997  1.1  christos 
    998  1.1  christos           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
    999  1.1  christos             dest->elems[--sbase] = src1->elems[i1];
   1000  1.1  christos 
   1001  1.1  christos 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
   1002  1.1  christos 	    break;
   1003  1.1  christos 	}
   1004  1.1  christos 
   1005  1.1  christos       /* Lower the highest of the two items.  */
   1006  1.1  christos       else if (src1->elems[i1] < src2->elems[i2])
   1007  1.1  christos 	{
   1008  1.1  christos 	  if (! REG_VALID_INDEX (--i2))
   1009  1.1  christos 	    break;
   1010  1.1  christos 	}
   1011  1.1  christos       else
   1012  1.1  christos 	{
   1013  1.1  christos 	  if (! REG_VALID_INDEX (--i1))
   1014  1.1  christos 	    break;
   1015  1.1  christos 	}
   1016  1.1  christos     }
   1017  1.1  christos 
   1018  1.1  christos   id = dest->nelem - 1;
   1019  1.1  christos   is = dest->nelem + src1->nelem + src2->nelem - 1;
   1020  1.1  christos   delta = is - sbase + 1;
   1021  1.1  christos 
   1022  1.1  christos   /* Now copy.  When DELTA becomes zero, the remaining
   1023  1.1  christos      DEST elements are already in place; this is more or
   1024  1.1  christos      less the same loop that is in re_node_set_merge.  */
   1025  1.1  christos   dest->nelem += delta;
   1026  1.1  christos   if (delta > 0 && REG_VALID_INDEX (id))
   1027  1.1  christos     for (;;)
   1028  1.1  christos       {
   1029  1.1  christos         if (dest->elems[is] > dest->elems[id])
   1030  1.1  christos           {
   1031  1.1  christos             /* Copy from the top.  */
   1032  1.1  christos             dest->elems[id + delta--] = dest->elems[is--];
   1033  1.1  christos             if (delta == 0)
   1034  1.1  christos               break;
   1035  1.1  christos           }
   1036  1.1  christos         else
   1037  1.1  christos           {
   1038  1.1  christos             /* Slide from the bottom.  */
   1039  1.1  christos             dest->elems[id + delta] = dest->elems[id];
   1040  1.1  christos             if (! REG_VALID_INDEX (--id))
   1041  1.1  christos               break;
   1042  1.1  christos           }
   1043  1.1  christos       }
   1044  1.1  christos 
   1045  1.1  christos   /* Copy remaining SRC elements.  */
   1046  1.1  christos   memcpy (dest->elems, dest->elems + sbase, delta * sizeof dest->elems[0]);
   1047  1.1  christos 
   1048  1.1  christos   return REG_NOERROR;
   1049  1.1  christos }
   1050  1.1  christos 
   1051  1.1  christos /* Calculate the union set of the sets SRC1 and SRC2. And store it to
   1052  1.1  christos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
   1053  1.1  christos 
   1054  1.1  christos static reg_errcode_t
   1055  1.1  christos internal_function
   1056  1.1  christos re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
   1057  1.1  christos 			const re_node_set *src2)
   1058  1.1  christos {
   1059  1.1  christos   Idx i1, i2, id;
   1060  1.1  christos   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
   1061  1.1  christos     {
   1062  1.1  christos       dest->alloc = src1->nelem + src2->nelem;
   1063  1.1  christos       if (sizeof (Idx) < 2 && dest->alloc < src1->nelem)
   1064  1.1  christos 	return REG_ESPACE;
   1065  1.1  christos       dest->elems = re_xmalloc (Idx, dest->alloc);
   1066  1.1  christos       if (BE (dest->elems == NULL, 0))
   1067  1.1  christos 	return REG_ESPACE;
   1068  1.1  christos     }
   1069  1.1  christos   else
   1070  1.1  christos     {
   1071  1.1  christos       if (src1 != NULL && src1->nelem > 0)
   1072  1.1  christos 	return re_node_set_init_copy (dest, src1);
   1073  1.1  christos       else if (src2 != NULL && src2->nelem > 0)
   1074  1.1  christos 	return re_node_set_init_copy (dest, src2);
   1075  1.1  christos       else
   1076  1.1  christos 	re_node_set_init_empty (dest);
   1077  1.1  christos       return REG_NOERROR;
   1078  1.1  christos     }
   1079  1.1  christos   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
   1080  1.1  christos     {
   1081  1.1  christos       if (src1->elems[i1] > src2->elems[i2])
   1082  1.1  christos 	{
   1083  1.1  christos 	  dest->elems[id++] = src2->elems[i2++];
   1084  1.1  christos 	  continue;
   1085  1.1  christos 	}
   1086  1.1  christos       if (src1->elems[i1] == src2->elems[i2])
   1087  1.1  christos 	++i2;
   1088  1.1  christos       dest->elems[id++] = src1->elems[i1++];
   1089  1.1  christos     }
   1090  1.1  christos   if (i1 < src1->nelem)
   1091  1.1  christos     {
   1092  1.1  christos       memcpy (dest->elems + id, src1->elems + i1,
   1093  1.1  christos 	     (src1->nelem - i1) * sizeof dest->elems[0]);
   1094  1.1  christos       id += src1->nelem - i1;
   1095  1.1  christos     }
   1096  1.1  christos   else if (i2 < src2->nelem)
   1097  1.1  christos     {
   1098  1.1  christos       memcpy (dest->elems + id, src2->elems + i2,
   1099  1.1  christos 	     (src2->nelem - i2) * sizeof dest->elems[0]);
   1100  1.1  christos       id += src2->nelem - i2;
   1101  1.1  christos     }
   1102  1.1  christos   dest->nelem = id;
   1103  1.1  christos   return REG_NOERROR;
   1104  1.1  christos }
   1105  1.1  christos 
   1106  1.1  christos /* Calculate the union set of the sets DEST and SRC. And store it to
   1107  1.1  christos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
   1108  1.1  christos 
   1109  1.1  christos static reg_errcode_t
   1110  1.1  christos internal_function
   1111  1.1  christos re_node_set_merge (re_node_set *dest, const re_node_set *src)
   1112  1.1  christos {
   1113  1.1  christos   Idx is, id, sbase, delta;
   1114  1.1  christos   if (src == NULL || src->nelem == 0)
   1115  1.1  christos     return REG_NOERROR;
   1116  1.1  christos   if (sizeof (Idx) < 3
   1117  1.1  christos       && ((Idx) (2 * src->nelem) < src->nelem
   1118  1.1  christos 	  || (Idx) (2 * src->nelem + dest->nelem) < dest->nelem))
   1119  1.1  christos     return REG_ESPACE;
   1120  1.1  christos   if (dest->alloc < 2 * src->nelem + dest->nelem)
   1121  1.1  christos     {
   1122  1.1  christos       Idx new_alloc = src->nelem + dest->alloc;
   1123  1.1  christos       Idx *new_buffer;
   1124  1.1  christos       if (sizeof (Idx) < 4 && new_alloc < dest->alloc)
   1125  1.1  christos 	return REG_ESPACE;
   1126  1.1  christos       new_buffer = re_x2realloc (dest->elems, Idx, &new_alloc);
   1127  1.1  christos       if (BE (new_buffer == NULL, 0))
   1128  1.1  christos 	return REG_ESPACE;
   1129  1.1  christos       dest->elems = new_buffer;
   1130  1.1  christos       dest->alloc = new_alloc;
   1131  1.1  christos     }
   1132  1.1  christos 
   1133  1.1  christos   if (BE (dest->nelem == 0, 0))
   1134  1.1  christos     {
   1135  1.1  christos       dest->nelem = src->nelem;
   1136  1.1  christos       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
   1137  1.1  christos       return REG_NOERROR;
   1138  1.1  christos     }
   1139  1.1  christos 
   1140  1.1  christos   /* Copy into the top of DEST the items of SRC that are not
   1141  1.1  christos      found in DEST.  Maybe we could binary search in DEST?  */
   1142  1.1  christos   for (sbase = dest->nelem + 2 * src->nelem,
   1143  1.1  christos        is = src->nelem - 1, id = dest->nelem - 1;
   1144  1.1  christos        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
   1145  1.1  christos     {
   1146  1.1  christos       if (dest->elems[id] == src->elems[is])
   1147  1.1  christos         is--, id--;
   1148  1.1  christos       else if (dest->elems[id] < src->elems[is])
   1149  1.1  christos         dest->elems[--sbase] = src->elems[is--];
   1150  1.1  christos       else /* if (dest->elems[id] > src->elems[is]) */
   1151  1.1  christos         --id;
   1152  1.1  christos     }
   1153  1.1  christos 
   1154  1.1  christos   if (REG_VALID_INDEX (is))
   1155  1.1  christos     {
   1156  1.1  christos       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
   1157  1.1  christos       sbase -= is + 1;
   1158  1.1  christos       memcpy (dest->elems + sbase, src->elems,
   1159  1.1  christos 	      (is + 1) * sizeof dest->elems[0]);
   1160  1.1  christos     }
   1161  1.1  christos 
   1162  1.1  christos   id = dest->nelem - 1;
   1163  1.1  christos   is = dest->nelem + 2 * src->nelem - 1;
   1164  1.1  christos   delta = is - sbase + 1;
   1165  1.1  christos   if (delta == 0)
   1166  1.1  christos     return REG_NOERROR;
   1167  1.1  christos 
   1168  1.1  christos   /* Now copy.  When DELTA becomes zero, the remaining
   1169  1.1  christos      DEST elements are already in place.  */
   1170  1.1  christos   dest->nelem += delta;
   1171  1.1  christos   for (;;)
   1172  1.1  christos     {
   1173  1.1  christos       if (dest->elems[is] > dest->elems[id])
   1174  1.1  christos         {
   1175  1.1  christos 	  /* Copy from the top.  */
   1176  1.1  christos           dest->elems[id + delta--] = dest->elems[is--];
   1177  1.1  christos 	  if (delta == 0)
   1178  1.1  christos 	    break;
   1179  1.1  christos 	}
   1180  1.1  christos       else
   1181  1.1  christos         {
   1182  1.1  christos           /* Slide from the bottom.  */
   1183  1.1  christos           dest->elems[id + delta] = dest->elems[id];
   1184  1.1  christos 	  if (! REG_VALID_INDEX (--id))
   1185  1.1  christos 	    {
   1186  1.1  christos 	      /* Copy remaining SRC elements.  */
   1187  1.1  christos 	      memcpy (dest->elems, dest->elems + sbase,
   1188  1.1  christos 	              delta * sizeof dest->elems[0]);
   1189  1.1  christos 	      break;
   1190  1.1  christos 	    }
   1191  1.1  christos 	}
   1192  1.1  christos     }
   1193  1.1  christos 
   1194  1.1  christos   return REG_NOERROR;
   1195  1.1  christos }
   1196  1.1  christos 
   1197  1.1  christos /* Insert the new element ELEM to the re_node_set* SET.
   1198  1.1  christos    SET should not already have ELEM.
   1199  1.1  christos    Return true if successful.  */
   1200  1.1  christos 
   1201  1.1  christos static bool
   1202  1.1  christos internal_function
   1203  1.1  christos re_node_set_insert (re_node_set *set, Idx elem)
   1204  1.1  christos {
   1205  1.1  christos   Idx idx;
   1206  1.1  christos   /* In case the set is empty.  */
   1207  1.1  christos   if (set->alloc == 0)
   1208  1.1  christos     return re_node_set_init_1 (set, elem) == REG_NOERROR;
   1209  1.1  christos 
   1210  1.1  christos   if (BE (set->nelem, 0) == 0)
   1211  1.1  christos     {
   1212  1.1  christos       /* We already guaranteed above that set->alloc != 0.  */
   1213  1.1  christos       set->elems[0] = elem;
   1214  1.1  christos       ++set->nelem;
   1215  1.1  christos       return true;
   1216  1.1  christos     }
   1217  1.1  christos 
   1218  1.1  christos   /* Realloc if we need.  */
   1219  1.1  christos   if (set->alloc == set->nelem)
   1220  1.1  christos     {
   1221  1.1  christos       Idx *new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
   1222  1.1  christos       if (BE (new_elems == NULL, 0))
   1223  1.1  christos 	return false;
   1224  1.1  christos       set->elems = new_elems;
   1225  1.1  christos     }
   1226  1.1  christos 
   1227  1.1  christos   /* Move the elements which follows the new element.  Test the
   1228  1.1  christos      first element separately to skip a check in the inner loop.  */
   1229  1.1  christos   if (elem < set->elems[0])
   1230  1.1  christos     {
   1231  1.1  christos       idx = 0;
   1232  1.1  christos       for (idx = set->nelem; idx > 0; idx--)
   1233  1.1  christos         set->elems[idx] = set->elems[idx - 1];
   1234  1.1  christos     }
   1235  1.1  christos   else
   1236  1.1  christos     {
   1237  1.1  christos       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
   1238  1.1  christos         set->elems[idx] = set->elems[idx - 1];
   1239  1.1  christos     }
   1240  1.1  christos 
   1241  1.1  christos   /* Insert the new element.  */
   1242  1.1  christos   set->elems[idx] = elem;
   1243  1.1  christos   ++set->nelem;
   1244  1.1  christos   return true;
   1245  1.1  christos }
   1246  1.1  christos 
   1247  1.1  christos /* Insert the new element ELEM to the re_node_set* SET.
   1248  1.1  christos    SET should not already have any element greater than or equal to ELEM.
   1249  1.1  christos    Return true if successful.  */
   1250  1.1  christos 
   1251  1.1  christos static bool
   1252  1.1  christos internal_function
   1253  1.1  christos re_node_set_insert_last (re_node_set *set, Idx elem)
   1254  1.1  christos {
   1255  1.1  christos   /* Realloc if we need.  */
   1256  1.1  christos   if (set->alloc == set->nelem)
   1257  1.1  christos     {
   1258  1.1  christos       Idx *new_elems;
   1259  1.1  christos       new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
   1260  1.1  christos       if (BE (new_elems == NULL, 0))
   1261  1.1  christos 	return false;
   1262  1.1  christos       set->elems = new_elems;
   1263  1.1  christos     }
   1264  1.1  christos 
   1265  1.1  christos   /* Insert the new element.  */
   1266  1.1  christos   set->elems[set->nelem++] = elem;
   1267  1.1  christos   return true;
   1268  1.1  christos }
   1269  1.1  christos 
   1270  1.1  christos /* Compare two node sets SET1 and SET2.
   1271  1.1  christos    Return true if SET1 and SET2 are equivalent.  */
   1272  1.1  christos 
   1273  1.1  christos static bool
   1274  1.1  christos internal_function __attribute ((pure))
   1275  1.1  christos re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
   1276  1.1  christos {
   1277  1.1  christos   Idx i;
   1278  1.1  christos   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
   1279  1.1  christos     return false;
   1280  1.1  christos   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
   1281  1.1  christos     if (set1->elems[i] != set2->elems[i])
   1282  1.1  christos       return false;
   1283  1.1  christos   return true;
   1284  1.1  christos }
   1285  1.1  christos 
   1286  1.1  christos /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
   1287  1.1  christos 
   1288  1.1  christos static Idx
   1289  1.1  christos internal_function __attribute ((pure))
   1290  1.1  christos re_node_set_contains (const re_node_set *set, Idx elem)
   1291  1.1  christos {
   1292  1.1  christos   __re_size_t idx, right, mid;
   1293  1.1  christos   if (! REG_VALID_NONZERO_INDEX (set->nelem))
   1294  1.1  christos     return 0;
   1295  1.1  christos 
   1296  1.1  christos   /* Binary search the element.  */
   1297  1.1  christos   idx = 0;
   1298  1.1  christos   right = set->nelem - 1;
   1299  1.1  christos   while (idx < right)
   1300  1.1  christos     {
   1301  1.1  christos       mid = (idx + right) / 2;
   1302  1.1  christos       if (set->elems[mid] < elem)
   1303  1.1  christos 	idx = mid + 1;
   1304  1.1  christos       else
   1305  1.1  christos 	right = mid;
   1306  1.1  christos     }
   1307  1.1  christos   return set->elems[idx] == elem ? idx + 1 : 0;
   1308  1.1  christos }
   1309  1.1  christos 
   1310  1.1  christos static void
   1311  1.1  christos internal_function
   1312  1.1  christos re_node_set_remove_at (re_node_set *set, Idx idx)
   1313  1.1  christos {
   1314  1.1  christos   if (idx < 0 || idx >= set->nelem)
   1315  1.1  christos     return;
   1316  1.1  christos   --set->nelem;
   1317  1.1  christos   for (; idx < set->nelem; idx++)
   1318  1.1  christos     set->elems[idx] = set->elems[idx + 1];
   1319  1.1  christos }
   1320  1.1  christos 
   1321  1.1  christos 
   1323  1.1  christos /* Add the token TOKEN to dfa->nodes, and return the index of the token.
   1324  1.1  christos    Or return REG_MISSING if an error occurred.  */
   1325  1.1  christos 
   1326  1.1  christos static Idx
   1327  1.1  christos internal_function
   1328  1.1  christos re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
   1329  1.1  christos {
   1330  1.1  christos   int type = token.type;
   1331  1.1  christos   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
   1332  1.1  christos     {
   1333  1.1  christos       Idx new_nodes_alloc = dfa->nodes_alloc;
   1334  1.1  christos       Idx *new_nexts, *new_indices;
   1335  1.1  christos       re_node_set *new_edests, *new_eclosures;
   1336  1.1  christos 
   1337  1.1  christos       re_token_t *new_nodes = re_x2realloc (dfa->nodes, re_token_t,
   1338  1.1  christos 					    &new_nodes_alloc);
   1339  1.1  christos       if (BE (new_nodes == NULL, 0))
   1340  1.1  christos 	return REG_MISSING;
   1341  1.1  christos       dfa->nodes = new_nodes;
   1342  1.1  christos       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
   1343  1.1  christos       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
   1344  1.1  christos       new_edests = re_xrealloc (dfa->edests, re_node_set, new_nodes_alloc);
   1345  1.1  christos       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
   1346  1.1  christos       if (BE (new_nexts == NULL || new_indices == NULL
   1347  1.1  christos 	      || new_edests == NULL || new_eclosures == NULL, 0))
   1348  1.1  christos 	return REG_MISSING;
   1349  1.1  christos       dfa->nexts = new_nexts;
   1350  1.1  christos       dfa->org_indices = new_indices;
   1351  1.1  christos       dfa->edests = new_edests;
   1352  1.1  christos       dfa->eclosures = new_eclosures;
   1353  1.1  christos       dfa->nodes_alloc = new_nodes_alloc;
   1354  1.1  christos     }
   1355  1.1  christos   dfa->nodes[dfa->nodes_len] = token;
   1356  1.1  christos   dfa->nodes[dfa->nodes_len].constraint = 0;
   1357  1.1  christos #ifdef RE_ENABLE_I18N
   1358  1.1  christos   dfa->nodes[dfa->nodes_len].accept_mb =
   1359  1.1  christos     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
   1360  1.1  christos #endif
   1361  1.1  christos   dfa->nexts[dfa->nodes_len] = REG_MISSING;
   1362  1.1  christos   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
   1363  1.1  christos   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
   1364  1.1  christos   return dfa->nodes_len++;
   1365  1.1  christos }
   1366  1.1  christos 
   1367  1.1  christos static inline re_hashval_t
   1368  1.1  christos internal_function
   1369  1.1  christos calc_state_hash (const re_node_set *nodes, unsigned int context)
   1370  1.1  christos {
   1371  1.1  christos   re_hashval_t hash = nodes->nelem + context;
   1372  1.1  christos   Idx i;
   1373  1.1  christos   for (i = 0 ; i < nodes->nelem ; i++)
   1374  1.1  christos     hash += nodes->elems[i];
   1375  1.1  christos   return hash;
   1376  1.1  christos }
   1377  1.1  christos 
   1378  1.1  christos /* Search for the state whose node_set is equivalent to NODES.
   1379  1.1  christos    Return the pointer to the state, if we found it in the DFA.
   1380  1.1  christos    Otherwise create the new one and return it.  In case of an error
   1381  1.1  christos    return NULL and set the error code in ERR.
   1382  1.1  christos    Note: - We assume NULL as the invalid state, then it is possible that
   1383  1.1  christos 	   return value is NULL and ERR is REG_NOERROR.
   1384  1.1  christos 	 - We never return non-NULL value in case of any errors, it is for
   1385  1.1  christos 	   optimization.  */
   1386  1.1  christos 
   1387  1.1  christos static re_dfastate_t*
   1388  1.1  christos internal_function
   1389  1.1  christos re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes)
   1390  1.1  christos {
   1391  1.1  christos   re_hashval_t hash;
   1392  1.1  christos   re_dfastate_t *new_state;
   1393  1.1  christos   struct re_state_table_entry *spot;
   1394  1.1  christos   Idx i;
   1395  1.1  christos #ifdef lint
   1396  1.1  christos   /* Suppress bogus uninitialized-variable warnings.  */
   1397  1.1  christos   *err = REG_NOERROR;
   1398  1.1  christos #endif
   1399  1.1  christos   if (BE (nodes->nelem == 0, 0))
   1400  1.1  christos     {
   1401  1.1  christos       *err = REG_NOERROR;
   1402  1.1  christos       return NULL;
   1403  1.1  christos     }
   1404  1.1  christos   hash = calc_state_hash (nodes, 0);
   1405  1.1  christos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1406  1.1  christos 
   1407  1.1  christos   for (i = 0 ; i < spot->num ; i++)
   1408  1.1  christos     {
   1409  1.1  christos       re_dfastate_t *state = spot->array[i];
   1410  1.1  christos       if (hash != state->hash)
   1411  1.1  christos 	continue;
   1412  1.1  christos       if (re_node_set_compare (&state->nodes, nodes))
   1413  1.1  christos 	return state;
   1414  1.1  christos     }
   1415  1.1  christos 
   1416  1.1  christos   /* There are no appropriate state in the dfa, create the new one.  */
   1417  1.1  christos   new_state = create_ci_newstate (dfa, nodes, hash);
   1418  1.1  christos   if (BE (new_state != NULL, 1))
   1419  1.1  christos     return new_state;
   1420  1.1  christos   else
   1421  1.1  christos     {
   1422  1.1  christos       *err = REG_ESPACE;
   1423  1.1  christos       return NULL;
   1424  1.1  christos     }
   1425  1.1  christos }
   1426  1.1  christos 
   1427  1.1  christos /* Search for the state whose node_set is equivalent to NODES and
   1428  1.1  christos    whose context is equivalent to CONTEXT.
   1429  1.1  christos    Return the pointer to the state, if we found it in the DFA.
   1430  1.1  christos    Otherwise create the new one and return it.  In case of an error
   1431  1.1  christos    return NULL and set the error code in ERR.
   1432  1.1  christos    Note: - We assume NULL as the invalid state, then it is possible that
   1433  1.1  christos 	   return value is NULL and ERR is REG_NOERROR.
   1434  1.1  christos 	 - We never return non-NULL value in case of any errors, it is for
   1435  1.1  christos 	   optimization.  */
   1436  1.1  christos 
   1437  1.1  christos static re_dfastate_t*
   1438  1.1  christos internal_function
   1439  1.1  christos re_acquire_state_context (reg_errcode_t *err, re_dfa_t *dfa,
   1440  1.1  christos 			  const re_node_set *nodes, unsigned int context)
   1441  1.1  christos {
   1442  1.1  christos   re_hashval_t hash;
   1443  1.1  christos   re_dfastate_t *new_state;
   1444  1.1  christos   struct re_state_table_entry *spot;
   1445  1.1  christos   Idx i;
   1446  1.1  christos #ifdef lint
   1447  1.1  christos   /* Suppress bogus uninitialized-variable warnings.  */
   1448  1.1  christos   *err = REG_NOERROR;
   1449  1.1  christos #endif
   1450  1.1  christos   if (nodes->nelem == 0)
   1451  1.1  christos     {
   1452  1.1  christos       *err = REG_NOERROR;
   1453  1.1  christos       return NULL;
   1454  1.1  christos     }
   1455  1.1  christos   hash = calc_state_hash (nodes, context);
   1456  1.1  christos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1457  1.1  christos 
   1458  1.1  christos   for (i = 0 ; i < spot->num ; i++)
   1459  1.1  christos     {
   1460  1.1  christos       re_dfastate_t *state = spot->array[i];
   1461  1.1  christos       if (state->hash == hash
   1462  1.1  christos 	  && state->context == context
   1463  1.1  christos 	  && re_node_set_compare (state->entrance_nodes, nodes))
   1464  1.1  christos 	return state;
   1465  1.1  christos     }
   1466  1.1  christos   /* There are no appropriate state in `dfa', create the new one.  */
   1467  1.1  christos   new_state = create_cd_newstate (dfa, nodes, context, hash);
   1468  1.1  christos   if (BE (new_state != NULL, 1))
   1469  1.1  christos     return new_state;
   1470  1.1  christos   else
   1471  1.1  christos     {
   1472  1.1  christos       *err = REG_ESPACE;
   1473  1.1  christos       return NULL;
   1474  1.1  christos     }
   1475  1.1  christos }
   1476  1.1  christos 
   1477  1.1  christos /* Finish initialization of the new state NEWSTATE, and using its hash value
   1478  1.1  christos    HASH put in the appropriate bucket of DFA's state table.  Return value
   1479  1.1  christos    indicates the error code if failed.  */
   1480  1.1  christos 
   1481  1.1  christos static reg_errcode_t
   1482  1.1  christos internal_function
   1483  1.1  christos register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash)
   1484  1.1  christos {
   1485  1.1  christos   struct re_state_table_entry *spot;
   1486  1.1  christos   reg_errcode_t err;
   1487  1.1  christos   Idx i;
   1488  1.1  christos 
   1489  1.1  christos   newstate->hash = hash;
   1490  1.1  christos   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
   1491  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1492  1.1  christos     return REG_ESPACE;
   1493  1.1  christos   for (i = 0; i < newstate->nodes.nelem; i++)
   1494  1.1  christos     {
   1495  1.1  christos       Idx elem = newstate->nodes.elems[i];
   1496  1.1  christos       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
   1497  1.1  christos 	{
   1498  1.1  christos 	  bool ok = re_node_set_insert_last (&newstate->non_eps_nodes, elem);
   1499  1.1  christos 	  if (BE (! ok, 0))
   1500  1.1  christos 	    return REG_ESPACE;
   1501  1.1  christos 	}
   1502  1.1  christos     }
   1503  1.1  christos 
   1504  1.1  christos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1505  1.1  christos   if (BE (spot->alloc <= spot->num, 0))
   1506  1.1  christos     {
   1507  1.1  christos       Idx new_alloc = spot->num;
   1508  1.1  christos       re_dfastate_t **new_array = re_x2realloc (spot->array, re_dfastate_t *,
   1509  1.1  christos 						&new_alloc);
   1510  1.1  christos       if (BE (new_array == NULL, 0))
   1511  1.1  christos 	return REG_ESPACE;
   1512  1.1  christos       spot->array = new_array;
   1513  1.1  christos       spot->alloc = new_alloc;
   1514  1.1  christos     }
   1515  1.1  christos   spot->array[spot->num++] = newstate;
   1516  1.1  christos   return REG_NOERROR;
   1517  1.1  christos }
   1518  1.1  christos 
   1519  1.1  christos /* Create the new state which is independ of contexts.
   1520  1.1  christos    Return the new state if succeeded, otherwise return NULL.  */
   1521  1.1  christos 
   1522  1.1  christos static re_dfastate_t *
   1523  1.1  christos internal_function
   1524  1.1  christos create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
   1525  1.1  christos 		    re_hashval_t hash)
   1526  1.1  christos {
   1527  1.1  christos   Idx i;
   1528  1.1  christos   reg_errcode_t err;
   1529  1.1  christos   re_dfastate_t *newstate;
   1530  1.1  christos 
   1531  1.1  christos   newstate = re_calloc (re_dfastate_t, 1);
   1532  1.1  christos   if (BE (newstate == NULL, 0))
   1533  1.1  christos     return NULL;
   1534  1.1  christos   err = re_node_set_init_copy (&newstate->nodes, nodes);
   1535  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1536  1.1  christos     {
   1537  1.1  christos       re_free (newstate);
   1538  1.1  christos       return NULL;
   1539  1.1  christos     }
   1540  1.1  christos 
   1541  1.1  christos   newstate->entrance_nodes = &newstate->nodes;
   1542  1.1  christos   for (i = 0 ; i < nodes->nelem ; i++)
   1543  1.1  christos     {
   1544  1.1  christos       re_token_t *node = dfa->nodes + nodes->elems[i];
   1545  1.1  christos       re_token_type_t type = node->type;
   1546  1.1  christos       if (type == CHARACTER && !node->constraint)
   1547  1.1  christos 	continue;
   1548  1.1  christos #ifdef RE_ENABLE_I18N
   1549  1.1  christos       newstate->accept_mb |= node->accept_mb;
   1550  1.1  christos #endif /* RE_ENABLE_I18N */
   1551  1.1  christos 
   1552  1.1  christos       /* If the state has the halt node, the state is a halt state.  */
   1553  1.1  christos       if (type == END_OF_RE)
   1554  1.1  christos 	newstate->halt = 1;
   1555  1.1  christos       else if (type == OP_BACK_REF)
   1556  1.1  christos 	newstate->has_backref = 1;
   1557  1.1  christos       else if (type == ANCHOR || node->constraint)
   1558  1.1  christos 	newstate->has_constraint = 1;
   1559  1.1  christos     }
   1560  1.1  christos   err = register_state (dfa, newstate, hash);
   1561  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1562  1.1  christos     {
   1563  1.1  christos       free_state (newstate);
   1564  1.1  christos       newstate = NULL;
   1565  1.1  christos     }
   1566  1.1  christos   return newstate;
   1567  1.1  christos }
   1568  1.1  christos 
   1569  1.1  christos /* Create the new state which is depend on the context CONTEXT.
   1570  1.1  christos    Return the new state if succeeded, otherwise return NULL.  */
   1571  1.1  christos 
   1572  1.1  christos static re_dfastate_t *
   1573  1.1  christos internal_function
   1574  1.1  christos create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
   1575  1.1  christos 		    unsigned int context, re_hashval_t hash)
   1576  1.1  christos {
   1577  1.1  christos   Idx i, nctx_nodes = 0;
   1578  1.1  christos   reg_errcode_t err;
   1579  1.1  christos   re_dfastate_t *newstate;
   1580  1.1  christos 
   1581  1.1  christos   newstate = re_calloc (re_dfastate_t, 1);
   1582  1.1  christos   if (BE (newstate == NULL, 0))
   1583  1.1  christos     return NULL;
   1584  1.1  christos   err = re_node_set_init_copy (&newstate->nodes, nodes);
   1585  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1586  1.1  christos     {
   1587  1.1  christos       re_free (newstate);
   1588  1.1  christos       return NULL;
   1589  1.1  christos     }
   1590  1.1  christos 
   1591  1.1  christos   newstate->context = context;
   1592  1.1  christos   newstate->entrance_nodes = &newstate->nodes;
   1593  1.1  christos 
   1594  1.1  christos   for (i = 0 ; i < nodes->nelem ; i++)
   1595  1.1  christos     {
   1596  1.1  christos       unsigned int constraint = 0;
   1597  1.1  christos       re_token_t *node = dfa->nodes + nodes->elems[i];
   1598  1.1  christos       re_token_type_t type = node->type;
   1599  1.1  christos       if (node->constraint)
   1600  1.1  christos 	constraint = node->constraint;
   1601  1.1  christos 
   1602  1.1  christos       if (type == CHARACTER && !constraint)
   1603  1.1  christos 	continue;
   1604  1.1  christos #ifdef RE_ENABLE_I18N
   1605  1.1  christos       newstate->accept_mb |= node->accept_mb;
   1606  1.1  christos #endif /* RE_ENABLE_I18N */
   1607  1.1  christos 
   1608  1.1  christos       /* If the state has the halt node, the state is a halt state.  */
   1609  1.1  christos       if (type == END_OF_RE)
   1610  1.1  christos 	newstate->halt = 1;
   1611  1.1  christos       else if (type == OP_BACK_REF)
   1612  1.1  christos 	newstate->has_backref = 1;
   1613  1.1  christos       else if (type == ANCHOR)
   1614  1.1  christos 	constraint = node->opr.ctx_type;
   1615  1.1  christos 
   1616  1.1  christos       if (constraint)
   1617  1.1  christos 	{
   1618  1.1  christos 	  if (newstate->entrance_nodes == &newstate->nodes)
   1619  1.1  christos 	    {
   1620  1.1  christos 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
   1621  1.1  christos 	      if (BE (newstate->entrance_nodes == NULL, 0))
   1622  1.1  christos 		{
   1623  1.1  christos 		  free_state (newstate);
   1624  1.1  christos 		  return NULL;
   1625  1.1  christos 		}
   1626  1.1  christos 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
   1627  1.1  christos 	      nctx_nodes = 0;
   1628  1.1  christos 	      newstate->has_constraint = 1;
   1629  1.1  christos 	    }
   1630  1.1  christos 
   1631  1.1  christos 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
   1632  1.1  christos 	    {
   1633  1.1  christos 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
   1634  1.1  christos 	      ++nctx_nodes;
   1635  1.1  christos 	    }
   1636  1.1  christos 	}
   1637  1.1  christos     }
   1638  1.1  christos   err = register_state (dfa, newstate, hash);
   1639  1.1  christos   if (BE (err != REG_NOERROR, 0))
   1640  1.1  christos     {
   1641  1.1  christos       free_state (newstate);
   1642  1.1  christos       newstate = NULL;
   1643  1.1  christos     }
   1644  1.1  christos   return  newstate;
   1645  1.1  christos }
   1646  1.1  christos 
   1647  1.1  christos static void
   1648  1.1  christos internal_function
   1649  1.1  christos free_state (re_dfastate_t *state)
   1650  1.1  christos {
   1651  1.1  christos   re_node_set_free (&state->non_eps_nodes);
   1652  1.1  christos   re_node_set_free (&state->inveclosure);
   1653  1.1  christos   if (state->entrance_nodes != &state->nodes)
   1654  1.1  christos     {
   1655  1.1  christos       re_node_set_free (state->entrance_nodes);
   1656  1.1  christos       re_free (state->entrance_nodes);
   1657  1.1  christos     }
   1658  1.1  christos   re_node_set_free (&state->nodes);
   1659  1.1  christos   re_free (state->word_trtable);
   1660                  re_free (state->trtable);
   1661                  re_free (state);
   1662                }
   1663