Update.

2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
2024-11-21 01:12:26 +08:00 · 2002-04-24 21:54:53 +00:00 · 2002-04-24 21:54:53 +00:00 · 612546c60d
commit 612546c60d
parent be479a6dfe
6 changed files with 20631 additions and 14603 deletions
--- a/45
+++ b/45
@ -1,3 +1,48 @@
+2002-04-22  Isamu Hasegawa  <isamu@yamato.ibm.com>
+
+	* posix/regcomp.c (re_compile_internal): Adapt it to new interface
+	of buffer building functions.
+	* posix/regex_internal.c (re_string_allocate): New function.
+	(re_string_realloc_buffers): New function.
+	(re_string_skip_chars): New function.
+	(re_string_reconstruct): New function.
+	(re_string_construct): Adapt it to new interface of buffer building
+	functions.
+	(re_string_construct_common): Likewise.
+	(build_wcs_buffer): Likewise.
+	(build_wcs_upper_buffer): Likewise.
+	(build_upper_buffer): Likewise.
+	(re_string_translate_buffer): Likewise.
+	(re_string_context_at): Adapt it to variable length buffers.
+	* posix/regex_internal.h (re_string_t): Add new fields to handle
+	variable length buffers.
+	(re_match_context_t): Likewise.
+	* posix/regexec.c (re_search_internal): Adapt it to new interface
+	of re_string_t and re_match_context_t.
+	(acquire_init_state_context): Likewise.
+	(check_matching): Likewise.
+	(check_halt_state_context): Likewise.
+	(proceed_next_node): Likewise.
+	(set_regs): Likewise.
+	(sift_states_backward): Likewise.
+	(clean_state_log_if_need): Likewise.
+	(sift_states_iter_mb): Likewise.
+	(sift_states_iter_bkref): Likewise.
+	(add_epsilon_backreference): Likewise.
+	(transit_state): Likewise.
+	(transit_state_sb): Likewise.
+	(transit_state_mb): Likewise.
+	(transit_state_bkref): Likewise.
+	(transit_state_bkref_loop): Likewise.
+	(check_node_accept): Likewise.
+	(match_ctx_init): Likewise.
+	(extend_buffers): New function.
+
+2002-04-21  Bruno Haible  <bruno@clisp.org>
+
+	* iconvdata/tst-table.sh: For the second check, use the truncated
+	GB18030 charmap table, like for the first check.
+
 2002-04-24  Ulrich Drepper  <drepper@redhat.com>

 	* elf/dl-load.c (open_verify): Correct __lseek parameters.
--- a/localedata/charmaps/GB18030
+++ b/localedata/charmaps/GB18030
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@ -692,12 +692,8 @@ re_compile_internal (preg, pattern, length, syntax)
      return err;
    }

-  if (syntax & RE_ICASE)
-    err = re_string_construct_toupper (&regexp, pattern, length,
-                                       preg->translate);
-  else
-    err = re_string_construct (&regexp, pattern, length, preg->translate);
-
+  err = re_string_construct (&regexp, pattern, length, preg->translate,
+                             syntax & RE_ICASE);
  if (BE (err != REG_NOERROR, 0))
    {
      re_free (dfa);
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@ -58,14 +58,9 @@
 #include "regex_internal.h"

 static void re_string_construct_common (const unsigned char *str,
-                                        int len, re_string_t *pstr);
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_wcs_buffer (re_string_t *pstr);
-static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
-#endif /* RE_ENABLE_I18N */
-static reg_errcode_t build_upper_buffer (re_string_t *pstr);
-static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
-						 RE_TRANSLATE_TYPE trans);
+                                        int len, re_string_t *pstr,
+                                        RE_TRANSLATE_TYPE trans, int icase);
+static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
 static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
                                              const re_node_set *nodes,
                                              unsigned int hash);
@ -83,278 +78,416 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,

 /* Functions for string operation.  */

-/* Construct string object.  */
+/* This function allocate the buffers.  It is necessary to call
+   re_string_reconstruct before using the object.  */
+
 static reg_errcode_t
-re_string_construct (pstr, str, len, trans)
+re_string_allocate (pstr, str, len, init_len, trans, icase)
     re_string_t *pstr;
     const unsigned char *str;
-     int len;
+     int len, init_len, icase;
     RE_TRANSLATE_TYPE trans;
 {
  reg_errcode_t ret;
-  re_string_construct_common (str, len, pstr);
-#ifdef RE_ENABLE_I18N
-  if (MB_CUR_MAX >1 && pstr->len > 0)
-    {
-      ret = build_wcs_buffer (pstr);
-      if (BE (ret != REG_NOERROR, 0))
-        return ret;
-    }
-#endif /* RE_ENABLE_I18N  */
-  pstr->mbs_case = str;
-  if (trans != NULL)
-    {
-      ret = re_string_translate_buffer (pstr, trans);
-      if (BE (ret != REG_NOERROR, 0))
-        return ret;
-    }
+  int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  re_string_construct_common (str, len, pstr, trans, icase);
+
+  ret = re_string_realloc_buffers (pstr, init_buf_len);
+  if (BE (ret != REG_NOERROR, 0))
+    return ret;
+
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+                    : (unsigned char *)str);
+  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+  pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
+                     || MB_CUR_MAX > 1) ? pstr->valid_len : len;
  return REG_NOERROR;
 }

-/* Construct string object. We use this function instead of
-   re_string_construct for case insensitive mode.  */
+/* This function allocate the buffers, and initialize them.  */

 static reg_errcode_t
-re_string_construct_toupper (pstr, str, len, trans)
+re_string_construct (pstr, str, len, trans, icase)
     re_string_t *pstr;
     const unsigned char *str;
-     int len;
+     int len, icase;
     RE_TRANSLATE_TYPE trans;
 {
  reg_errcode_t ret;
-  /* Set case sensitive buffer.  */
-  re_string_construct_common (str, len, pstr);
-#ifdef RE_ENABLE_I18N
-  if (MB_CUR_MAX >1)
+  re_string_construct_common (str, len, pstr, trans, icase);
+  /* Set 0 so that this function can initialize whole buffers.  */
+  pstr->valid_len = 0;
+
+  if (len > 0)
    {
-      if (BE (pstr->len > 0, 1))
-        {
-          ret = build_wcs_upper_buffer (pstr);
-          if (BE (ret != REG_NOERROR, 0))
-            return ret;
-        }
+      ret = re_string_realloc_buffers (pstr, len + 1);
+      if (BE (ret != REG_NOERROR, 0))
+        return ret;
+    }
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+                    : (unsigned char *)str);
+  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+
+  if (icase)
+    {
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+        build_wcs_upper_buffer (pstr);
+      else
+        build_upper_buffer (pstr);
+#endif /* RE_ENABLE_I18N  */
    }
  else
-#endif /* RE_ENABLE_I18N  */
    {
-      if (BE (pstr->len > 0, 1))
+#ifdef RE_ENABLE_I18N
+      if (MB_CUR_MAX > 1)
+        build_wcs_buffer (pstr);
+      else
+#endif /* RE_ENABLE_I18N  */
        {
-          ret = build_upper_buffer (pstr);
-          if (BE (ret != REG_NOERROR, 0))
-            return ret;
+          if (trans != NULL)
+            re_string_translate_buffer (pstr);
+          else
+            pstr->valid_len = len;
        }
    }
-  pstr->mbs_case = str;
-  if (trans != NULL)
-    {
-      ret = re_string_translate_buffer (pstr, trans);
-      if (BE (ret != REG_NOERROR, 0))
-        return ret;
-    }
+
+  /* Initialized whole buffers, then valid_len == bufs_len.  */
+  pstr->valid_len = pstr->bufs_len;
  return REG_NOERROR;
 }

-/* Helper functions for re_string_construct_*.  */
+/* Helper functions for re_string_allocate, and re_string_construct.  */
+
+static reg_errcode_t
+re_string_realloc_buffers (pstr, new_buf_len)
+     re_string_t *pstr;
+     int new_buf_len;
+{
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
+    {
+      pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len);
+      if (BE (pstr->wcs == NULL, 0))
+        return REG_ESPACE;
+    }
+#endif /* RE_ENABLE_I18N  */
+  if (MBS_ALLOCATED (pstr))
+    {
+      pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
+      if (BE (pstr->mbs == NULL, 0))
+        return REG_ESPACE;
+    }
+  if (MBS_CASE_ALLOCATED (pstr))
+    {
+      pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
+      if (BE (pstr->mbs_case == NULL, 0))
+        return REG_ESPACE;
+      if (!MBS_ALLOCATED (pstr))
+        pstr->mbs = pstr->mbs_case;
+    }
+  pstr->bufs_len = new_buf_len;
+  return REG_NOERROR;
+}
+
+
 static void
-re_string_construct_common (str, len, pstr)
+re_string_construct_common (str, len, pstr, trans, icase)
     const unsigned char *str;
     int len;
     re_string_t *pstr;
+     RE_TRANSLATE_TYPE trans;
+     int icase;
 {
-  pstr->mbs = str;
-  pstr->cur_idx = 0;
+  memset (pstr, '\0', sizeof (re_string_t));
+  pstr->raw_mbs = str;
  pstr->len = len;
-#ifdef RE_ENABLE_I18N
-  pstr->wcs = NULL;
-#endif
-  pstr->mbs_case = NULL;
-  pstr->mbs_alloc = 0;
-  pstr->mbs_case_alloc = 0;
+  pstr->trans = trans;
+  pstr->icase = icase ? 1 : 0;
 }

 #ifdef RE_ENABLE_I18N

-/* Build wide character buffer for `pstr'.
+/* Build wide character buffer PSTR->WCS.
   If the byte sequence of the string are:
     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
   Then wide character buffer will be:
     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
   We use WEOF for padding, they indicate that the position isn't
-   a first byte of a multibyte character.  */
+   a first byte of a multibyte character.

-static reg_errcode_t
+   Note that this function assumes PSTR->VALID_LEN elements are already
+   built and starts from PSTR->VALID_LEN.  */
+
+static void
 build_wcs_buffer (pstr)
     re_string_t *pstr;
 {
-  mbstate_t state, prev_st;
-  wchar_t wc;
-  int char_idx, char_len, mbclen;
-
-  pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
-  if (BE (pstr->wcs == NULL, 0))
-    return REG_ESPACE;
-
-  memset (&state, '\0', sizeof (mbstate_t));
-  char_len = pstr->len;
-  for (char_idx = 0; char_idx < char_len ;)
+  mbstate_t prev_st;
+  int byte_idx, end_idx, mbclen, remain_len;
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
    {
-      int next_idx, remain_len = char_len - char_idx;
-      prev_st = state;
-      mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
-      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
-        /* We treat these cases as a singlebyte character.  */
+      wchar_t wc;
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+                        remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
        {
+          /* The buffer doesn't have enough space, finish to build.  */
+          pstr->cur_state = prev_st;
+          break;
+        }
+      else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+        {
+          /* We treat these cases as a singlebyte character.  */
          mbclen = 1;
-          wc = (wchar_t) pstr->mbs[char_idx++];
-          state = prev_st;
+          wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+          pstr->cur_state = prev_st;
+        }
+
+      /* Apply the translateion if we need.  */
+      if (pstr->trans != NULL && mbclen == 1)
+        {
+          int ch =  pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
+          pstr->mbs_case[byte_idx] = ch;
        }
      /* Write wide character and padding.  */
-      pstr->wcs[char_idx++] = wc;
-      for (next_idx = char_idx + mbclen - 1; char_idx < next_idx ;)
-        pstr->wcs[char_idx++] = WEOF;
+      pstr->wcs[byte_idx++] = wc;
+      /* Write paddings.  */
+      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+        pstr->wcs[byte_idx++] = WEOF;
    }
-  return REG_NOERROR;
+  pstr->valid_len = byte_idx;
 }

-static reg_errcode_t
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+   but for REG_ICASE.  */
+
+static void
 build_wcs_upper_buffer (pstr)
     re_string_t *pstr;
 {
-  mbstate_t state, prev_st;
-  wchar_t wc;
-  unsigned char *mbs_upper;
-  int char_idx, char_len, mbclen;
-
-  pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
-  mbs_upper = re_malloc (unsigned char, pstr->len + 1);
-  if (BE (pstr->wcs == NULL || mbs_upper == NULL, 0))
+  mbstate_t prev_st;
+  int byte_idx, end_idx, mbclen, remain_len;
+  /* Build the buffers from pstr->valid_len to either pstr->len or
+     pstr->bufs_len.  */
+  end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
    {
-      pstr->wcs = NULL;
-      return REG_ESPACE;
-    }
-
-  memset (&state, '\0', sizeof (mbstate_t));
-  char_len = pstr->len;
-  for (char_idx = 0 ; char_idx < char_len ; char_idx += mbclen)
-    {
-      int byte_idx, remain_len = char_len - char_idx;
-      prev_st = state;
-      mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
-      if (mbclen == 1)
+      wchar_t wc;
+      remain_len = end_idx - byte_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+                        remain_len, &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2, 0))
        {
-          pstr->wcs[char_idx] = wc;
-          if (islower (pstr->mbs[char_idx]))
-            mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
-          else
-            mbs_upper[char_idx] = pstr->mbs[char_idx];
+          /* The buffer doesn't have enough space, finish to build.  */
+          pstr->cur_state = prev_st;
+          break;
        }
-      else if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1
-                   || mbclen == 0, 0))
-        /* We treat these cases as a singlebyte character.  */
+      else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
        {
-          mbclen = 1;
-          pstr->wcs[char_idx] = (wchar_t) pstr->mbs[char_idx];
-          mbs_upper[char_idx] = pstr->mbs[char_idx];
-          state = prev_st;
+          /* In case of a singlebyte character.  */
+          int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+          /* Apply the translateion if we need.  */
+          if (pstr->trans != NULL && mbclen == 1)
+            {
+              ch = pstr->trans[ch];
+              pstr->mbs_case[byte_idx] = ch;
+            }
+          pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
+          pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
+          if (BE (mbclen == (size_t) -1, 0))
+            pstr->cur_state = prev_st;
        }
      else /* mbclen > 1 */
        {
-          pstr->wcs[char_idx] = wc;
          if (iswlower (wc))
-            wcrtomb (mbs_upper + char_idx, towupper (wc), &prev_st);
+            wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
          else
-            memcpy (mbs_upper + char_idx, pstr->mbs + char_idx, mbclen);
-          for (byte_idx = 1 ; byte_idx < mbclen ; byte_idx++)
-            pstr->wcs[char_idx + byte_idx] = WEOF;
+            memcpy (pstr->mbs + byte_idx,
+                    pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+          pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
+          /* Write paddings.  */
+          for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+            pstr->wcs[byte_idx++] = WEOF;
        }
    }
-  pstr->mbs = mbs_upper;
-  pstr->mbs_alloc = 1;
-  return REG_NOERROR;
+  pstr->valid_len = byte_idx;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+   Return the index.  */
+
+static int
+re_string_skip_chars (pstr, new_raw_idx)
+     re_string_t *pstr;
+     int new_raw_idx;
+{
+  mbstate_t prev_st;
+  int rawbuf_idx, mbclen;
+
+  /* Skip the characters which are not necessary to check.  */
+  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
+       rawbuf_idx < new_raw_idx;)
+    {
+      int remain_len = pstr->len - rawbuf_idx;
+      prev_st = pstr->cur_state;
+      mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
+                       &pstr->cur_state);
+      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+        {
+          /* We treat these cases as a singlebyte character.  */
+          mbclen = 1;
+          pstr->cur_state = prev_st;
+        }
+      /* Then proceed the next character.  */
+      rawbuf_idx += mbclen;
+    }
+  return rawbuf_idx;
 }
 #endif /* RE_ENABLE_I18N  */

-static reg_errcode_t
+/* Build the buffer PSTR->MBS, and apply the translation if we need.  
+   This function is used in case of REG_ICASE.  */
+
+static void
 build_upper_buffer (pstr)
     re_string_t *pstr;
 {
-  unsigned char *mbs_upper;
-  int char_idx, char_len;
+  int char_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;

-  mbs_upper = re_malloc (unsigned char, pstr->len + 1);
-  if (BE (mbs_upper == NULL, 0))
-    return REG_ESPACE;
-
-  char_len = pstr->len;
-  for (char_idx = 0 ; char_idx < char_len ; char_idx ++)
+  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
    {
-      if (islower (pstr->mbs[char_idx]))
-        mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+      if (pstr->trans != NULL)
+        {
+          ch =  pstr->trans[ch];
+          pstr->mbs_case[char_idx] = ch;
+        }
+      if (islower (ch))
+        pstr->mbs[char_idx] = toupper (ch);
      else
-        mbs_upper[char_idx] = pstr->mbs[char_idx];
+        pstr->mbs[char_idx] = ch;
    }
-  pstr->mbs = mbs_upper;
-  pstr->mbs_alloc = 1;
-  return REG_NOERROR;
+  pstr->valid_len = char_idx;
 }

-/* Apply TRANS to the buffer in PSTR.  We assume that wide char buffer
-   is already constructed if MB_CUR_MAX > 1.  */
+/* Apply TRANS to the buffer in PSTR.  */
+
+static void
+re_string_translate_buffer (pstr)
+     re_string_t *pstr;
+{
+  int buf_idx, end_idx;
+  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+    {
+      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+      pstr->mbs_case[buf_idx] = pstr->trans[ch];
+    }
+
+  pstr->valid_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+   Concretely, convert to wide character in case of MB_CUR_MAX > 1,
+   convert to upper case in case of REG_ICASE, apply translation.  */

 static reg_errcode_t
-re_string_translate_buffer (pstr, trans)
+re_string_reconstruct (pstr, idx, eflags, newline)
     re_string_t *pstr;
-     RE_TRANSLATE_TYPE trans;
+     int idx, eflags, newline;
 {
-  int buf_idx;
-  unsigned char *transed_buf, *transed_case_buf;
-#ifdef DEBUG
-  assert (trans != NULL);
-#endif
-  if (pstr->mbs_alloc)
+  int offset = idx - pstr->raw_mbs_idx;
+  if (offset < 0)
    {
-      transed_buf = (unsigned char *) pstr->mbs;
-      transed_case_buf = re_malloc (unsigned char, pstr->len + 1);
-      if (BE (transed_case_buf == NULL, 0))
-        return REG_ESPACE;
-      pstr->mbs_case_alloc = 1;
+      /* Reset buffer.  */
+      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+      pstr->valid_len = pstr->raw_mbs_idx = 0;
+      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+                           : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+      if (!MBS_CASE_ALLOCATED (pstr))
+        pstr->mbs_case = (unsigned char *)pstr->raw_mbs;
+      if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
+        pstr->mbs = (unsigned char *)pstr->raw_mbs;
+      offset = idx;
    }
-  else
+
+  if (offset != 0)
    {
-      transed_buf = re_malloc (unsigned char, pstr->len + 1);
-      if (BE (transed_buf == NULL, 0))
-        return REG_ESPACE;
-      transed_case_buf = NULL;
-      pstr->mbs_alloc = 1;
-    }
-  for (buf_idx = 0 ; buf_idx < pstr->len ; buf_idx++)
-    {
-#ifdef RE_ENABLE_I18N
-      if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
-        transed_buf[buf_idx] = pstr->mbs[buf_idx];
-      else
-#endif
-        transed_buf[buf_idx] = trans[pstr->mbs[buf_idx]];
-      if (transed_case_buf)
+      pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
+                                                newline);
+      /* Are the characters which are already checked remain?  */
+      if (offset < pstr->valid_len)
        {
+          /* Yes, move them to the front of the buffer.  */
 #ifdef RE_ENABLE_I18N
-         if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
-            transed_case_buf[buf_idx] = pstr->mbs_case[buf_idx];
-          else
+          if (MB_CUR_MAX > 1)
+            memmove (pstr->wcs, pstr->wcs + offset,
+                     (pstr->valid_len - offset) * sizeof (wchar_t));
+#endif /* RE_ENABLE_I18N */
+          if (MBS_ALLOCATED (pstr))
+            memmove (pstr->mbs, pstr->mbs + offset,
+                     pstr->valid_len - offset);
+          if (MBS_CASE_ALLOCATED (pstr))
+            memmove (pstr->mbs_case, pstr->mbs_case + offset,
+                     pstr->valid_len - offset);
+          pstr->valid_len -= offset;
+#if DEBUG
+          assert (pstr->valid_len > 0);
 #endif
-            transed_case_buf[buf_idx] = trans[pstr->mbs_case[buf_idx]];
+        }
+      else
+        {
+          /* No, skip all characters until IDX.  */
+          pstr->valid_len = 0;
+#ifdef RE_ENABLE_I18N
+          if (MB_CUR_MAX > 1)
+            {
+              int wcs_idx;
+              pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
+              for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+                pstr->wcs[wcs_idx] = WEOF;
+            }
+#endif /* RE_ENABLE_I18N */
+        }
+      if (!MBS_CASE_ALLOCATED (pstr))
+        {
+          pstr->mbs_case += offset; 
+          /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED.  */
+          if (!MBS_ALLOCATED (pstr))
+            pstr->mbs += offset; 
        }
    }
-  if (pstr->mbs_case_alloc == 1)
+  pstr->raw_mbs_idx = idx;
+  pstr->len -= offset;
+
+  /* Then build the buffers.  */
+#ifdef RE_ENABLE_I18N
+  if (MB_CUR_MAX > 1)
    {
-      pstr->mbs = transed_buf;
-      pstr->mbs_case = transed_case_buf;
+      if (pstr->icase)
+        build_wcs_upper_buffer (pstr);
+      else
+        build_wcs_buffer (pstr);
    }
  else
+#endif /* RE_ENABLE_I18N */
    {
-      pstr->mbs = transed_buf;
-      pstr->mbs_case = transed_buf;
+      if (pstr->icase)
+        build_upper_buffer (pstr);
+      else if (pstr->trans != NULL)
+        re_string_translate_buffer (pstr);
    }
+  pstr->cur_idx = 0;
+
  return REG_NOERROR;
 }

@ -365,13 +498,14 @@ re_string_destruct (pstr)
 #ifdef RE_ENABLE_I18N
  re_free (pstr->wcs);
 #endif /* RE_ENABLE_I18N  */
-  if (pstr->mbs_alloc)
-    re_free ((void *) pstr->mbs);
-  if (pstr->mbs_case_alloc)
-    re_free ((void *) pstr->mbs_case);
+  if (MBS_ALLOCATED (pstr))
+    re_free (pstr->mbs);
+  if (MBS_CASE_ALLOCATED (pstr))
+    re_free (pstr->mbs_case);
 }

 /* Return the context at IDX in INPUT.  */
+
 static unsigned int
 re_string_context_at (input, idx, eflags, newline_anchor)
     const re_string_t *input;
@ -380,17 +514,13 @@ re_string_context_at (input, idx, eflags, newline_anchor)
  int c;
  if (idx < 0 || idx == input->len)
    {
-      unsigned int context = 0;
      if (idx < 0)
-        context = CONTEXT_BEGBUF;
+        /* In this case, we use the value stored in input->tip_context,
+           since we can't know the character in input->mbs[-1] here.  */
+        return input->tip_context;
      else /* (idx == input->len) */
-        context = CONTEXT_ENDBUF;
-
-      if ((idx < 0 && !(eflags & REG_NOTBOL))
-          || (idx == input->len && !(eflags & REG_NOTEOL)))
-        return CONTEXT_NEWLINE | context;
-      else
-        return context;
+        return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+                : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
    }
  c = re_string_byte_at (input, idx);
  if (IS_WORD_CHAR (c))
@ -737,6 +867,7 @@ re_node_set_insert (set, elem)
      if (set->nelem - idx > 0)
        memcpy (new_array + idx + 1, set->elems + idx,
 		sizeof (int) * (set->nelem - idx));
+      re_free (set->elems);
      set->elems = new_array;
    }
  else
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@ -201,33 +201,67 @@ typedef struct

 struct re_string_t
 {
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
  /* Store the multibyte string.  In case of "case insensitive mode" like
-     REG_ICASE, upper cases of the string are stored.  */
-  const unsigned char *mbs;
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
  /* Store the case sensitive multibyte string.  In case of
     "case insensitive mode", the original string are stored,
     otherwise MBS_CASE points the same address that MBS points.  */
-  const unsigned char *mbs_case;
-  int cur_idx;
-  int len;
+  unsigned char *mbs_case;
 #ifdef RE_ENABLE_I18N
  /* Store the wide character string which is corresponding to MBS.  */
  wchar_t *wcs;
+  mbstate_t cur_state;
 #endif
-  /* 1 if mbs is allocated by regex library.  */
-  unsigned int mbs_alloc : 1;
-  /* 1 if mbs_case is allocated by regex library.  */
-  unsigned int mbs_case_alloc : 1;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The length of the buffers MBS, MBS_CASE, and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* This is length_of_RAW_MBS - RAW_MBS_IDX.  */
+  int len;
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* 1 if REG_ICASE.  */
+  unsigned int icase : 1;
 };
 typedef struct re_string_t re_string_t;
+/* In case of REG_ICASE, we allocate the buffer dynamically for mbs.  */
+#define MBS_ALLOCATED(pstr) (pstr->icase)
+/* In case that we need translation, we allocate the buffer dynamically
+   for mbs_case.  Note that mbs == mbs_case if not REG_ICASE.  */
+#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)

+
+static reg_errcode_t re_string_allocate (re_string_t *pstr,
+                                         const unsigned char *str, int len,
+                                         int init_len,
+                                         RE_TRANSLATE_TYPE trans, int icase);
 static reg_errcode_t re_string_construct (re_string_t *pstr,
 					  const unsigned char *str, int len,
-					  RE_TRANSLATE_TYPE trans);
-static reg_errcode_t re_string_construct_toupper (re_string_t *pstr,
-						  const unsigned char *str,
-						  int len,
-						  RE_TRANSLATE_TYPE trans);
+                                          RE_TRANSLATE_TYPE trans, int icase);
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+                                            int eflags, int newline);
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+                                                int new_buf_len);
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr);
+static void build_wcs_upper_buffer (re_string_t *pstr);
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr);
+static void re_string_translate_buffer (re_string_t *pstr);
 static void re_string_destruct (re_string_t *pstr);
 #ifdef RE_ENABLE_I18N
 static int re_string_elem_size_at (const re_string_t *pstr, int idx);
@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
 #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
 #define re_string_get_buffer(pstr) ((pstr)->mbs)
 #define re_string_length(pstr) ((pstr)->len)
-#define re_string_byte_at(pstr,idx) \
-  ((pstr)->mbs[idx])
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
 #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))

@ -279,27 +312,6 @@ struct bin_tree_t
 };
 typedef struct bin_tree_t bin_tree_t;

-struct re_backref_cache_entry
-{
-  int node;
-  int from;
-  int to;
-  int flag;
-};
-
-typedef struct
-{
-  int eflags;
-  int match_first;
-  int match_last;
-  int state_log_top;
-  /* Back reference cache.  */
-  int nbkref_ents;
-  int abkref_ents;
-  struct re_backref_cache_entry *bkref_ents;
-  int max_bkref_len;
-} re_match_context_t;
-

 #define CONTEXT_WORD 1
 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
@ -363,6 +375,32 @@ struct re_state_table_entry
  re_dfastate_t **array;
 };

+struct re_backref_cache_entry
+{
+  int node;
+  int from;
+  int to;
+  int flag;
+};
+
+typedef struct
+{
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  /* The string object corresponding to the input string.  */
+  re_string_t *input;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_bkref_len;
+} re_match_context_t;
+
 struct re_dfa_t
 {
  re_bitset_ptr_t word_char;
--- a/posix/regexec.c
+++ b/posix/regexec.c