mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-21 01:12:26 +08:00
Update.
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
This commit is contained in:
parent
be479a6dfe
commit
612546c60d
45
ChangeLog
45
ChangeLog
@ -1,3 +1,48 @@
|
||||
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com>
|
||||
|
||||
* posix/regcomp.c (re_compile_internal): Adapt it to new interface
|
||||
of buffer building functions.
|
||||
* posix/regex_internal.c (re_string_allocate): New function.
|
||||
(re_string_realloc_buffers): New function.
|
||||
(re_string_skip_chars): New function.
|
||||
(re_string_reconstruct): New function.
|
||||
(re_string_construct): Adapt it to new interface of buffer building
|
||||
functions.
|
||||
(re_string_construct_common): Likewise.
|
||||
(build_wcs_buffer): Likewise.
|
||||
(build_wcs_upper_buffer): Likewise.
|
||||
(build_upper_buffer): Likewise.
|
||||
(re_string_translate_buffer): Likewise.
|
||||
(re_string_context_at): Adapt it to variable length buffers.
|
||||
* posix/regex_internal.h (re_string_t): Add new fields to handle
|
||||
variable length buffers.
|
||||
(re_match_context_t): Likewise.
|
||||
* posix/regexec.c (re_search_internal): Adapt it to new interface
|
||||
of re_string_t and re_match_context_t.
|
||||
(acquire_init_state_context): Likewise.
|
||||
(check_matching): Likewise.
|
||||
(check_halt_state_context): Likewise.
|
||||
(proceed_next_node): Likewise.
|
||||
(set_regs): Likewise.
|
||||
(sift_states_backward): Likewise.
|
||||
(clean_state_log_if_need): Likewise.
|
||||
(sift_states_iter_mb): Likewise.
|
||||
(sift_states_iter_bkref): Likewise.
|
||||
(add_epsilon_backreference): Likewise.
|
||||
(transit_state): Likewise.
|
||||
(transit_state_sb): Likewise.
|
||||
(transit_state_mb): Likewise.
|
||||
(transit_state_bkref): Likewise.
|
||||
(transit_state_bkref_loop): Likewise.
|
||||
(check_node_accept): Likewise.
|
||||
(match_ctx_init): Likewise.
|
||||
(extend_buffers): New function.
|
||||
|
||||
2002-04-21 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
* iconvdata/tst-table.sh: For the second check, use the truncated
|
||||
GB18030 charmap table, like for the first check.
|
||||
|
||||
2002-04-24 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* elf/dl-load.c (open_verify): Correct __lseek parameters.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -692,12 +692,8 @@ re_compile_internal (preg, pattern, length, syntax)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (syntax & RE_ICASE)
|
||||
err = re_string_construct_toupper (®exp, pattern, length,
|
||||
preg->translate);
|
||||
else
|
||||
err = re_string_construct (®exp, pattern, length, preg->translate);
|
||||
|
||||
err = re_string_construct (®exp, pattern, length, preg->translate,
|
||||
syntax & RE_ICASE);
|
||||
if (BE (err != REG_NOERROR, 0))
|
||||
{
|
||||
re_free (dfa);
|
||||
|
@ -58,14 +58,9 @@
|
||||
#include "regex_internal.h"
|
||||
|
||||
static void re_string_construct_common (const unsigned char *str,
|
||||
int len, re_string_t *pstr);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static reg_errcode_t build_wcs_buffer (re_string_t *pstr);
|
||||
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
static reg_errcode_t build_upper_buffer (re_string_t *pstr);
|
||||
static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
|
||||
RE_TRANSLATE_TYPE trans);
|
||||
int len, re_string_t *pstr,
|
||||
RE_TRANSLATE_TYPE trans, int icase);
|
||||
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
|
||||
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int hash);
|
||||
@ -83,278 +78,416 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
|
||||
|
||||
/* Functions for string operation. */
|
||||
|
||||
/* Construct string object. */
|
||||
/* This function allocate the buffers. It is necessary to call
|
||||
re_string_reconstruct before using the object. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_string_construct (pstr, str, len, trans)
|
||||
re_string_allocate (pstr, str, len, init_len, trans, icase)
|
||||
re_string_t *pstr;
|
||||
const unsigned char *str;
|
||||
int len;
|
||||
int len, init_len, icase;
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
{
|
||||
reg_errcode_t ret;
|
||||
re_string_construct_common (str, len, pstr);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX >1 && pstr->len > 0)
|
||||
{
|
||||
ret = build_wcs_buffer (pstr);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
pstr->mbs_case = str;
|
||||
if (trans != NULL)
|
||||
{
|
||||
ret = re_string_translate_buffer (pstr, trans);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
|
||||
re_string_construct_common (str, len, pstr, trans, icase);
|
||||
|
||||
ret = re_string_realloc_buffers (pstr, init_buf_len);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
|
||||
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
|
||||
: (unsigned char *)str);
|
||||
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
||||
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|
||||
|| MB_CUR_MAX > 1) ? pstr->valid_len : len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Construct string object. We use this function instead of
|
||||
re_string_construct for case insensitive mode. */
|
||||
/* This function allocate the buffers, and initialize them. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_string_construct_toupper (pstr, str, len, trans)
|
||||
re_string_construct (pstr, str, len, trans, icase)
|
||||
re_string_t *pstr;
|
||||
const unsigned char *str;
|
||||
int len;
|
||||
int len, icase;
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
{
|
||||
reg_errcode_t ret;
|
||||
/* Set case sensitive buffer. */
|
||||
re_string_construct_common (str, len, pstr);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX >1)
|
||||
re_string_construct_common (str, len, pstr, trans, icase);
|
||||
/* Set 0 so that this function can initialize whole buffers. */
|
||||
pstr->valid_len = 0;
|
||||
|
||||
if (len > 0)
|
||||
{
|
||||
if (BE (pstr->len > 0, 1))
|
||||
{
|
||||
ret = build_wcs_upper_buffer (pstr);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
ret = re_string_realloc_buffers (pstr, len + 1);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
|
||||
: (unsigned char *)str);
|
||||
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
||||
|
||||
if (icase)
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
build_wcs_upper_buffer (pstr);
|
||||
else
|
||||
build_upper_buffer (pstr);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
}
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
{
|
||||
if (BE (pstr->len > 0, 1))
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
build_wcs_buffer (pstr);
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
{
|
||||
ret = build_upper_buffer (pstr);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
if (trans != NULL)
|
||||
re_string_translate_buffer (pstr);
|
||||
else
|
||||
pstr->valid_len = len;
|
||||
}
|
||||
}
|
||||
pstr->mbs_case = str;
|
||||
if (trans != NULL)
|
||||
{
|
||||
ret = re_string_translate_buffer (pstr, trans);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Initialized whole buffers, then valid_len == bufs_len. */
|
||||
pstr->valid_len = pstr->bufs_len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Helper functions for re_string_construct_*. */
|
||||
/* Helper functions for re_string_allocate, and re_string_construct. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_string_realloc_buffers (pstr, new_buf_len)
|
||||
re_string_t *pstr;
|
||||
int new_buf_len;
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len);
|
||||
if (BE (pstr->wcs == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
{
|
||||
pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
|
||||
if (BE (pstr->mbs == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
}
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
{
|
||||
pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
|
||||
if (BE (pstr->mbs_case == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
if (!MBS_ALLOCATED (pstr))
|
||||
pstr->mbs = pstr->mbs_case;
|
||||
}
|
||||
pstr->bufs_len = new_buf_len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
re_string_construct_common (str, len, pstr)
|
||||
re_string_construct_common (str, len, pstr, trans, icase)
|
||||
const unsigned char *str;
|
||||
int len;
|
||||
re_string_t *pstr;
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
int icase;
|
||||
{
|
||||
pstr->mbs = str;
|
||||
pstr->cur_idx = 0;
|
||||
memset (pstr, '\0', sizeof (re_string_t));
|
||||
pstr->raw_mbs = str;
|
||||
pstr->len = len;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
pstr->wcs = NULL;
|
||||
#endif
|
||||
pstr->mbs_case = NULL;
|
||||
pstr->mbs_alloc = 0;
|
||||
pstr->mbs_case_alloc = 0;
|
||||
pstr->trans = trans;
|
||||
pstr->icase = icase ? 1 : 0;
|
||||
}
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
|
||||
/* Build wide character buffer for `pstr'.
|
||||
/* Build wide character buffer PSTR->WCS.
|
||||
If the byte sequence of the string are:
|
||||
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
|
||||
Then wide character buffer will be:
|
||||
<wc1> , WEOF , <wc2> , WEOF , <wc3>
|
||||
We use WEOF for padding, they indicate that the position isn't
|
||||
a first byte of a multibyte character. */
|
||||
a first byte of a multibyte character.
|
||||
|
||||
static reg_errcode_t
|
||||
Note that this function assumes PSTR->VALID_LEN elements are already
|
||||
built and starts from PSTR->VALID_LEN. */
|
||||
|
||||
static void
|
||||
build_wcs_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
mbstate_t state, prev_st;
|
||||
wchar_t wc;
|
||||
int char_idx, char_len, mbclen;
|
||||
|
||||
pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
|
||||
if (BE (pstr->wcs == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
memset (&state, '\0', sizeof (mbstate_t));
|
||||
char_len = pstr->len;
|
||||
for (char_idx = 0; char_idx < char_len ;)
|
||||
mbstate_t prev_st;
|
||||
int byte_idx, end_idx, mbclen, remain_len;
|
||||
/* Build the buffers from pstr->valid_len to either pstr->len or
|
||||
pstr->bufs_len. */
|
||||
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||
{
|
||||
int next_idx, remain_len = char_len - char_idx;
|
||||
prev_st = state;
|
||||
mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
|
||||
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
|
||||
/* We treat these cases as a singlebyte character. */
|
||||
wchar_t wc;
|
||||
remain_len = end_idx - byte_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
|
||||
remain_len, &pstr->cur_state);
|
||||
if (BE (mbclen == (size_t) -2, 0))
|
||||
{
|
||||
/* The buffer doesn't have enough space, finish to build. */
|
||||
pstr->cur_state = prev_st;
|
||||
break;
|
||||
}
|
||||
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
|
||||
{
|
||||
/* We treat these cases as a singlebyte character. */
|
||||
mbclen = 1;
|
||||
wc = (wchar_t) pstr->mbs[char_idx++];
|
||||
state = prev_st;
|
||||
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
pstr->cur_state = prev_st;
|
||||
}
|
||||
|
||||
/* Apply the translateion if we need. */
|
||||
if (pstr->trans != NULL && mbclen == 1)
|
||||
{
|
||||
int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
|
||||
pstr->mbs_case[byte_idx] = ch;
|
||||
}
|
||||
/* Write wide character and padding. */
|
||||
pstr->wcs[char_idx++] = wc;
|
||||
for (next_idx = char_idx + mbclen - 1; char_idx < next_idx ;)
|
||||
pstr->wcs[char_idx++] = WEOF;
|
||||
pstr->wcs[byte_idx++] = wc;
|
||||
/* Write paddings. */
|
||||
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||
pstr->wcs[byte_idx++] = WEOF;
|
||||
}
|
||||
return REG_NOERROR;
|
||||
pstr->valid_len = byte_idx;
|
||||
}
|
||||
|
||||
static reg_errcode_t
|
||||
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
|
||||
but for REG_ICASE. */
|
||||
|
||||
static void
|
||||
build_wcs_upper_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
mbstate_t state, prev_st;
|
||||
wchar_t wc;
|
||||
unsigned char *mbs_upper;
|
||||
int char_idx, char_len, mbclen;
|
||||
|
||||
pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
|
||||
mbs_upper = re_malloc (unsigned char, pstr->len + 1);
|
||||
if (BE (pstr->wcs == NULL || mbs_upper == NULL, 0))
|
||||
mbstate_t prev_st;
|
||||
int byte_idx, end_idx, mbclen, remain_len;
|
||||
/* Build the buffers from pstr->valid_len to either pstr->len or
|
||||
pstr->bufs_len. */
|
||||
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||
{
|
||||
pstr->wcs = NULL;
|
||||
return REG_ESPACE;
|
||||
}
|
||||
|
||||
memset (&state, '\0', sizeof (mbstate_t));
|
||||
char_len = pstr->len;
|
||||
for (char_idx = 0 ; char_idx < char_len ; char_idx += mbclen)
|
||||
{
|
||||
int byte_idx, remain_len = char_len - char_idx;
|
||||
prev_st = state;
|
||||
mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
|
||||
if (mbclen == 1)
|
||||
wchar_t wc;
|
||||
remain_len = end_idx - byte_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
|
||||
remain_len, &pstr->cur_state);
|
||||
if (BE (mbclen == (size_t) -2, 0))
|
||||
{
|
||||
pstr->wcs[char_idx] = wc;
|
||||
if (islower (pstr->mbs[char_idx]))
|
||||
mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
|
||||
else
|
||||
mbs_upper[char_idx] = pstr->mbs[char_idx];
|
||||
/* The buffer doesn't have enough space, finish to build. */
|
||||
pstr->cur_state = prev_st;
|
||||
break;
|
||||
}
|
||||
else if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1
|
||||
|| mbclen == 0, 0))
|
||||
/* We treat these cases as a singlebyte character. */
|
||||
else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
|
||||
{
|
||||
mbclen = 1;
|
||||
pstr->wcs[char_idx] = (wchar_t) pstr->mbs[char_idx];
|
||||
mbs_upper[char_idx] = pstr->mbs[char_idx];
|
||||
state = prev_st;
|
||||
/* In case of a singlebyte character. */
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
/* Apply the translateion if we need. */
|
||||
if (pstr->trans != NULL && mbclen == 1)
|
||||
{
|
||||
ch = pstr->trans[ch];
|
||||
pstr->mbs_case[byte_idx] = ch;
|
||||
}
|
||||
pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
|
||||
pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
|
||||
if (BE (mbclen == (size_t) -1, 0))
|
||||
pstr->cur_state = prev_st;
|
||||
}
|
||||
else /* mbclen > 1 */
|
||||
{
|
||||
pstr->wcs[char_idx] = wc;
|
||||
if (iswlower (wc))
|
||||
wcrtomb (mbs_upper + char_idx, towupper (wc), &prev_st);
|
||||
wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
|
||||
else
|
||||
memcpy (mbs_upper + char_idx, pstr->mbs + char_idx, mbclen);
|
||||
for (byte_idx = 1 ; byte_idx < mbclen ; byte_idx++)
|
||||
pstr->wcs[char_idx + byte_idx] = WEOF;
|
||||
memcpy (pstr->mbs + byte_idx,
|
||||
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
||||
pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
|
||||
/* Write paddings. */
|
||||
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||
pstr->wcs[byte_idx++] = WEOF;
|
||||
}
|
||||
}
|
||||
pstr->mbs = mbs_upper;
|
||||
pstr->mbs_alloc = 1;
|
||||
return REG_NOERROR;
|
||||
pstr->valid_len = byte_idx;
|
||||
}
|
||||
|
||||
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
|
||||
Return the index. */
|
||||
|
||||
static int
|
||||
re_string_skip_chars (pstr, new_raw_idx)
|
||||
re_string_t *pstr;
|
||||
int new_raw_idx;
|
||||
{
|
||||
mbstate_t prev_st;
|
||||
int rawbuf_idx, mbclen;
|
||||
|
||||
/* Skip the characters which are not necessary to check. */
|
||||
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
|
||||
rawbuf_idx < new_raw_idx;)
|
||||
{
|
||||
int remain_len = pstr->len - rawbuf_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
|
||||
&pstr->cur_state);
|
||||
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
|
||||
{
|
||||
/* We treat these cases as a singlebyte character. */
|
||||
mbclen = 1;
|
||||
pstr->cur_state = prev_st;
|
||||
}
|
||||
/* Then proceed the next character. */
|
||||
rawbuf_idx += mbclen;
|
||||
}
|
||||
return rawbuf_idx;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
||||
static reg_errcode_t
|
||||
/* Build the buffer PSTR->MBS, and apply the translation if we need.
|
||||
This function is used in case of REG_ICASE. */
|
||||
|
||||
static void
|
||||
build_upper_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
unsigned char *mbs_upper;
|
||||
int char_idx, char_len;
|
||||
int char_idx, end_idx;
|
||||
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
|
||||
|
||||
mbs_upper = re_malloc (unsigned char, pstr->len + 1);
|
||||
if (BE (mbs_upper == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
char_len = pstr->len;
|
||||
for (char_idx = 0 ; char_idx < char_len ; char_idx ++)
|
||||
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
|
||||
{
|
||||
if (islower (pstr->mbs[char_idx]))
|
||||
mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
|
||||
if (pstr->trans != NULL)
|
||||
{
|
||||
ch = pstr->trans[ch];
|
||||
pstr->mbs_case[char_idx] = ch;
|
||||
}
|
||||
if (islower (ch))
|
||||
pstr->mbs[char_idx] = toupper (ch);
|
||||
else
|
||||
mbs_upper[char_idx] = pstr->mbs[char_idx];
|
||||
pstr->mbs[char_idx] = ch;
|
||||
}
|
||||
pstr->mbs = mbs_upper;
|
||||
pstr->mbs_alloc = 1;
|
||||
return REG_NOERROR;
|
||||
pstr->valid_len = char_idx;
|
||||
}
|
||||
|
||||
/* Apply TRANS to the buffer in PSTR. We assume that wide char buffer
|
||||
is already constructed if MB_CUR_MAX > 1. */
|
||||
/* Apply TRANS to the buffer in PSTR. */
|
||||
|
||||
static void
|
||||
re_string_translate_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
int buf_idx, end_idx;
|
||||
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
|
||||
|
||||
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
|
||||
{
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
|
||||
pstr->mbs_case[buf_idx] = pstr->trans[ch];
|
||||
}
|
||||
|
||||
pstr->valid_len = buf_idx;
|
||||
}
|
||||
|
||||
/* This function re-construct the buffers.
|
||||
Concretely, convert to wide character in case of MB_CUR_MAX > 1,
|
||||
convert to upper case in case of REG_ICASE, apply translation. */
|
||||
|
||||
static reg_errcode_t
|
||||
re_string_translate_buffer (pstr, trans)
|
||||
re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
re_string_t *pstr;
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
int idx, eflags, newline;
|
||||
{
|
||||
int buf_idx;
|
||||
unsigned char *transed_buf, *transed_case_buf;
|
||||
#ifdef DEBUG
|
||||
assert (trans != NULL);
|
||||
#endif
|
||||
if (pstr->mbs_alloc)
|
||||
int offset = idx - pstr->raw_mbs_idx;
|
||||
if (offset < 0)
|
||||
{
|
||||
transed_buf = (unsigned char *) pstr->mbs;
|
||||
transed_case_buf = re_malloc (unsigned char, pstr->len + 1);
|
||||
if (BE (transed_case_buf == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
pstr->mbs_case_alloc = 1;
|
||||
/* Reset buffer. */
|
||||
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
|
||||
pstr->valid_len = pstr->raw_mbs_idx = 0;
|
||||
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
|
||||
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
|
||||
if (!MBS_CASE_ALLOCATED (pstr))
|
||||
pstr->mbs_case = (unsigned char *)pstr->raw_mbs;
|
||||
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
|
||||
pstr->mbs = (unsigned char *)pstr->raw_mbs;
|
||||
offset = idx;
|
||||
}
|
||||
else
|
||||
|
||||
if (offset != 0)
|
||||
{
|
||||
transed_buf = re_malloc (unsigned char, pstr->len + 1);
|
||||
if (BE (transed_buf == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
transed_case_buf = NULL;
|
||||
pstr->mbs_alloc = 1;
|
||||
}
|
||||
for (buf_idx = 0 ; buf_idx < pstr->len ; buf_idx++)
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
|
||||
transed_buf[buf_idx] = pstr->mbs[buf_idx];
|
||||
else
|
||||
#endif
|
||||
transed_buf[buf_idx] = trans[pstr->mbs[buf_idx]];
|
||||
if (transed_case_buf)
|
||||
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
|
||||
newline);
|
||||
/* Are the characters which are already checked remain? */
|
||||
if (offset < pstr->valid_len)
|
||||
{
|
||||
/* Yes, move them to the front of the buffer. */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
|
||||
transed_case_buf[buf_idx] = pstr->mbs_case[buf_idx];
|
||||
else
|
||||
if (MB_CUR_MAX > 1)
|
||||
memmove (pstr->wcs, pstr->wcs + offset,
|
||||
(pstr->valid_len - offset) * sizeof (wchar_t));
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
memmove (pstr->mbs, pstr->mbs + offset,
|
||||
pstr->valid_len - offset);
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
memmove (pstr->mbs_case, pstr->mbs_case + offset,
|
||||
pstr->valid_len - offset);
|
||||
pstr->valid_len -= offset;
|
||||
#if DEBUG
|
||||
assert (pstr->valid_len > 0);
|
||||
#endif
|
||||
transed_case_buf[buf_idx] = trans[pstr->mbs_case[buf_idx]];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No, skip all characters until IDX. */
|
||||
pstr->valid_len = 0;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
int wcs_idx;
|
||||
pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
|
||||
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
|
||||
pstr->wcs[wcs_idx] = WEOF;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
}
|
||||
if (!MBS_CASE_ALLOCATED (pstr))
|
||||
{
|
||||
pstr->mbs_case += offset;
|
||||
/* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
|
||||
if (!MBS_ALLOCATED (pstr))
|
||||
pstr->mbs += offset;
|
||||
}
|
||||
}
|
||||
if (pstr->mbs_case_alloc == 1)
|
||||
pstr->raw_mbs_idx = idx;
|
||||
pstr->len -= offset;
|
||||
|
||||
/* Then build the buffers. */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (MB_CUR_MAX > 1)
|
||||
{
|
||||
pstr->mbs = transed_buf;
|
||||
pstr->mbs_case = transed_case_buf;
|
||||
if (pstr->icase)
|
||||
build_wcs_upper_buffer (pstr);
|
||||
else
|
||||
build_wcs_buffer (pstr);
|
||||
}
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
{
|
||||
pstr->mbs = transed_buf;
|
||||
pstr->mbs_case = transed_buf;
|
||||
if (pstr->icase)
|
||||
build_upper_buffer (pstr);
|
||||
else if (pstr->trans != NULL)
|
||||
re_string_translate_buffer (pstr);
|
||||
}
|
||||
pstr->cur_idx = 0;
|
||||
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
@ -365,13 +498,14 @@ re_string_destruct (pstr)
|
||||
#ifdef RE_ENABLE_I18N
|
||||
re_free (pstr->wcs);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (pstr->mbs_alloc)
|
||||
re_free ((void *) pstr->mbs);
|
||||
if (pstr->mbs_case_alloc)
|
||||
re_free ((void *) pstr->mbs_case);
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
re_free (pstr->mbs);
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
re_free (pstr->mbs_case);
|
||||
}
|
||||
|
||||
/* Return the context at IDX in INPUT. */
|
||||
|
||||
static unsigned int
|
||||
re_string_context_at (input, idx, eflags, newline_anchor)
|
||||
const re_string_t *input;
|
||||
@ -380,17 +514,13 @@ re_string_context_at (input, idx, eflags, newline_anchor)
|
||||
int c;
|
||||
if (idx < 0 || idx == input->len)
|
||||
{
|
||||
unsigned int context = 0;
|
||||
if (idx < 0)
|
||||
context = CONTEXT_BEGBUF;
|
||||
/* In this case, we use the value stored in input->tip_context,
|
||||
since we can't know the character in input->mbs[-1] here. */
|
||||
return input->tip_context;
|
||||
else /* (idx == input->len) */
|
||||
context = CONTEXT_ENDBUF;
|
||||
|
||||
if ((idx < 0 && !(eflags & REG_NOTBOL))
|
||||
|| (idx == input->len && !(eflags & REG_NOTEOL)))
|
||||
return CONTEXT_NEWLINE | context;
|
||||
else
|
||||
return context;
|
||||
return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
|
||||
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
|
||||
}
|
||||
c = re_string_byte_at (input, idx);
|
||||
if (IS_WORD_CHAR (c))
|
||||
@ -737,6 +867,7 @@ re_node_set_insert (set, elem)
|
||||
if (set->nelem - idx > 0)
|
||||
memcpy (new_array + idx + 1, set->elems + idx,
|
||||
sizeof (int) * (set->nelem - idx));
|
||||
re_free (set->elems);
|
||||
set->elems = new_array;
|
||||
}
|
||||
else
|
||||
|
@ -201,33 +201,67 @@ typedef struct
|
||||
|
||||
struct re_string_t
|
||||
{
|
||||
/* Indicate the raw buffer which is the original string passed as an
|
||||
argument of regexec(), re_search(), etc.. */
|
||||
const unsigned char *raw_mbs;
|
||||
/* Index in RAW_MBS. Each character mbs[i] corresponds to
|
||||
raw_mbs[raw_mbs_idx + i]. */
|
||||
int raw_mbs_idx;
|
||||
/* Store the multibyte string. In case of "case insensitive mode" like
|
||||
REG_ICASE, upper cases of the string are stored. */
|
||||
const unsigned char *mbs;
|
||||
REG_ICASE, upper cases of the string are stored, otherwise MBS points
|
||||
the same address that RAW_MBS points. */
|
||||
unsigned char *mbs;
|
||||
/* Store the case sensitive multibyte string. In case of
|
||||
"case insensitive mode", the original string are stored,
|
||||
otherwise MBS_CASE points the same address that MBS points. */
|
||||
const unsigned char *mbs_case;
|
||||
int cur_idx;
|
||||
int len;
|
||||
unsigned char *mbs_case;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Store the wide character string which is corresponding to MBS. */
|
||||
wchar_t *wcs;
|
||||
mbstate_t cur_state;
|
||||
#endif
|
||||
/* 1 if mbs is allocated by regex library. */
|
||||
unsigned int mbs_alloc : 1;
|
||||
/* 1 if mbs_case is allocated by regex library. */
|
||||
unsigned int mbs_case_alloc : 1;
|
||||
/* The length of the valid characters in the buffers. */
|
||||
int valid_len;
|
||||
/* The length of the buffers MBS, MBS_CASE, and WCS. */
|
||||
int bufs_len;
|
||||
/* The index in MBS, which is updated by re_string_fetch_byte. */
|
||||
int cur_idx;
|
||||
/* This is length_of_RAW_MBS - RAW_MBS_IDX. */
|
||||
int len;
|
||||
/* The context of mbs[0]. We store the context independently, since
|
||||
the context of mbs[0] may be different from raw_mbs[0], which is
|
||||
the beginning of the input string. */
|
||||
unsigned int tip_context;
|
||||
/* The translation passed as a part of an argument of re_compile_pattern. */
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
/* 1 if REG_ICASE. */
|
||||
unsigned int icase : 1;
|
||||
};
|
||||
typedef struct re_string_t re_string_t;
|
||||
/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
|
||||
#define MBS_ALLOCATED(pstr) (pstr->icase)
|
||||
/* In case that we need translation, we allocate the buffer dynamically
|
||||
for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
|
||||
#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
|
||||
|
||||
|
||||
static reg_errcode_t re_string_allocate (re_string_t *pstr,
|
||||
const unsigned char *str, int len,
|
||||
int init_len,
|
||||
RE_TRANSLATE_TYPE trans, int icase);
|
||||
static reg_errcode_t re_string_construct (re_string_t *pstr,
|
||||
const unsigned char *str, int len,
|
||||
RE_TRANSLATE_TYPE trans);
|
||||
static reg_errcode_t re_string_construct_toupper (re_string_t *pstr,
|
||||
const unsigned char *str,
|
||||
int len,
|
||||
RE_TRANSLATE_TYPE trans);
|
||||
RE_TRANSLATE_TYPE trans, int icase);
|
||||
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
|
||||
int eflags, int newline);
|
||||
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
|
||||
int new_buf_len);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static void build_wcs_buffer (re_string_t *pstr);
|
||||
static void build_wcs_upper_buffer (re_string_t *pstr);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
static void build_upper_buffer (re_string_t *pstr);
|
||||
static void re_string_translate_buffer (re_string_t *pstr);
|
||||
static void re_string_destruct (re_string_t *pstr);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static int re_string_elem_size_at (const re_string_t *pstr, int idx);
|
||||
@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
|
||||
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
|
||||
#define re_string_get_buffer(pstr) ((pstr)->mbs)
|
||||
#define re_string_length(pstr) ((pstr)->len)
|
||||
#define re_string_byte_at(pstr,idx) \
|
||||
((pstr)->mbs[idx])
|
||||
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
|
||||
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
|
||||
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
|
||||
|
||||
@ -279,27 +312,6 @@ struct bin_tree_t
|
||||
};
|
||||
typedef struct bin_tree_t bin_tree_t;
|
||||
|
||||
struct re_backref_cache_entry
|
||||
{
|
||||
int node;
|
||||
int from;
|
||||
int to;
|
||||
int flag;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int eflags;
|
||||
int match_first;
|
||||
int match_last;
|
||||
int state_log_top;
|
||||
/* Back reference cache. */
|
||||
int nbkref_ents;
|
||||
int abkref_ents;
|
||||
struct re_backref_cache_entry *bkref_ents;
|
||||
int max_bkref_len;
|
||||
} re_match_context_t;
|
||||
|
||||
|
||||
#define CONTEXT_WORD 1
|
||||
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
|
||||
@ -363,6 +375,32 @@ struct re_state_table_entry
|
||||
re_dfastate_t **array;
|
||||
};
|
||||
|
||||
struct re_backref_cache_entry
|
||||
{
|
||||
int node;
|
||||
int from;
|
||||
int to;
|
||||
int flag;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* EFLAGS of the argument of regexec. */
|
||||
int eflags;
|
||||
/* Where the matching ends. */
|
||||
int match_last;
|
||||
/* The string object corresponding to the input string. */
|
||||
re_string_t *input;
|
||||
/* The state log used by the matcher. */
|
||||
re_dfastate_t **state_log;
|
||||
int state_log_top;
|
||||
/* Back reference cache. */
|
||||
int nbkref_ents;
|
||||
int abkref_ents;
|
||||
struct re_backref_cache_entry *bkref_ents;
|
||||
int max_bkref_len;
|
||||
} re_match_context_t;
|
||||
|
||||
struct re_dfa_t
|
||||
{
|
||||
re_bitset_ptr_t word_char;
|
||||
|
631
posix/regexec.c
631
posix/regexec.c
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user