mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-31 14:01:18 +08:00
Update.
* misc/mntent_r.c (decode_name): Fix decoding of tab, add decoding of newline. * manual/sysinfo.texi (mtab): Adjust description accordingly. Reported by Andries.Brouwer@cwi.nl.
This commit is contained in:
parent
bb3f4825c4
commit
37369d1cef
@ -1,5 +1,10 @@
|
||||
2003-11-28 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* misc/mntent_r.c (decode_name): Fix decoding of tab, add decoding
|
||||
of newline.
|
||||
* manual/sysinfo.texi (mtab): Adjust description accordingly.
|
||||
Reported by Andries.Brouwer@cwi.nl.
|
||||
|
||||
* sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes
|
||||
to compensate other setup.
|
||||
|
||||
|
@ -673,12 +673,13 @@ filled with the information from the next entry from the file currently
|
||||
read.
|
||||
|
||||
The file format used prescribes the use of spaces or tab characters to
|
||||
separate the fields. This makes it harder to use name containing one of
|
||||
these characters (e.g., mount points using spaces). Therefore these
|
||||
characters are encoded in the files and the @code{getmntent} function
|
||||
takes care of the decoding while reading the entries back in.
|
||||
@code{'\040'} is used to encode a space character, @code{'\012'} to
|
||||
encode a tab character and @code{'\\'} to encode a backslash.
|
||||
separate the fields. This makes it harder to use name containing one
|
||||
of these characters (e.g., mount points using spaces). Therefore
|
||||
these characters are encoded in the files and the @code{getmntent}
|
||||
function takes care of the decoding while reading the entries back in.
|
||||
@code{'\040'} is used to encode a space character, @code{'\011'} to
|
||||
encode a tab character, @code{'\012'} to encode a newline character,
|
||||
and @code{'\\'} to encode a backslash.
|
||||
|
||||
If there was an error or the end of the file is reached the return value
|
||||
is @code{NULL}.
|
||||
|
@ -84,12 +84,18 @@ decode_name (char *buf)
|
||||
*wp++ = ' ';
|
||||
rp += 3;
|
||||
}
|
||||
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '2')
|
||||
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '1')
|
||||
{
|
||||
/* \012 is a TAB. */
|
||||
*wp++ = '\t';
|
||||
rp += 3;
|
||||
}
|
||||
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '2')
|
||||
{
|
||||
/* \012 is a NEWLINE. */
|
||||
*wp++ = '\n';
|
||||
rp += 3;
|
||||
}
|
||||
else if (rp[0] == '\\' && rp[1] == '\\')
|
||||
{
|
||||
/* We have to escape \\ to be able to represent all characters. */
|
||||
|
@ -62,17 +62,14 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
|
||||
init_len = dfa->mb_cur_max;
|
||||
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
|
||||
re_string_construct_common (str, len, pstr, trans, icase, dfa);
|
||||
pstr->stop = pstr->len;
|
||||
|
||||
ret = re_string_realloc_buffers (pstr, init_buf_len);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
|
||||
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
|
||||
: (unsigned char *) str);
|
||||
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
||||
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|
||||
|| dfa->mb_cur_max > 1) ? pstr->valid_len : len;
|
||||
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
|
||||
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
|
||||
pstr->valid_raw_len = pstr->valid_len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
@ -88,9 +85,6 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
|
||||
{
|
||||
reg_errcode_t ret;
|
||||
re_string_construct_common (str, len, pstr, trans, icase, dfa);
|
||||
pstr->stop = pstr->len;
|
||||
/* Set 0 so that this function can initialize whole buffers. */
|
||||
pstr->valid_len = 0;
|
||||
|
||||
if (len > 0)
|
||||
{
|
||||
@ -98,15 +92,27 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
|
||||
: (unsigned char *) str);
|
||||
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
|
||||
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
|
||||
|
||||
if (icase)
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (dfa->mb_cur_max > 1)
|
||||
build_wcs_upper_buffer (pstr);
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
ret = build_wcs_upper_buffer (pstr);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
if (pstr->valid_raw_len >= len)
|
||||
break;
|
||||
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
|
||||
break;
|
||||
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
build_upper_buffer (pstr);
|
||||
@ -122,12 +128,13 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
|
||||
if (trans != NULL)
|
||||
re_string_translate_buffer (pstr);
|
||||
else
|
||||
pstr->valid_len = len;
|
||||
{
|
||||
pstr->valid_len = pstr->bufs_len;
|
||||
pstr->valid_raw_len = pstr->bufs_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialized whole buffers, then valid_len == bufs_len. */
|
||||
pstr->valid_len = pstr->bufs_len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
@ -145,9 +152,16 @@ re_string_realloc_buffers (pstr, new_buf_len)
|
||||
if (BE (new_array == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
pstr->wcs = new_array;
|
||||
if (pstr->offsets != NULL)
|
||||
{
|
||||
int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
|
||||
if (BE (new_array == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
pstr->offsets = new_array;
|
||||
}
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
if (pstr->mbs_allocated)
|
||||
{
|
||||
unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
|
||||
new_buf_len);
|
||||
@ -155,16 +169,6 @@ re_string_realloc_buffers (pstr, new_buf_len)
|
||||
return REG_ESPACE;
|
||||
pstr->mbs = new_array;
|
||||
}
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
{
|
||||
unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
|
||||
new_buf_len);
|
||||
if (BE (new_array == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
pstr->mbs_case = new_array;
|
||||
if (!MBS_ALLOCATED (pstr))
|
||||
pstr->mbs = pstr->mbs_case;
|
||||
}
|
||||
pstr->bufs_len = new_buf_len;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
@ -182,11 +186,15 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa)
|
||||
memset (pstr, '\0', sizeof (re_string_t));
|
||||
pstr->raw_mbs = (const unsigned char *) str;
|
||||
pstr->len = len;
|
||||
pstr->raw_len = len;
|
||||
pstr->trans = trans;
|
||||
pstr->icase = icase ? 1 : 0;
|
||||
pstr->mbs_allocated = (trans != NULL || icase);
|
||||
pstr->mb_cur_max = dfa->mb_cur_max;
|
||||
pstr->is_utf8 = dfa->is_utf8;
|
||||
pstr->map_notascii = dfa->map_notascii;
|
||||
pstr->stop = pstr->len;
|
||||
pstr->raw_stop = pstr->stop;
|
||||
}
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
@ -206,18 +214,39 @@ static void
|
||||
build_wcs_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
#ifdef _LIBC
|
||||
unsigned char buf[pstr->mb_cur_max];
|
||||
#else
|
||||
unsigned char buf[64];
|
||||
#endif
|
||||
mbstate_t prev_st;
|
||||
int byte_idx, end_idx, mbclen, remain_len;
|
||||
|
||||
/* Build the buffers from pstr->valid_len to either pstr->len or
|
||||
pstr->bufs_len. */
|
||||
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
||||
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
|
||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||
{
|
||||
wchar_t wc;
|
||||
const char *p;
|
||||
|
||||
remain_len = end_idx - byte_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
||||
+ byte_idx), remain_len, &pstr->cur_state);
|
||||
/* Apply the translation if we need. */
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
{
|
||||
int i, ch;
|
||||
|
||||
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
|
||||
{
|
||||
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
|
||||
buf[i] = pstr->trans[ch];
|
||||
}
|
||||
p = (const char *) buf;
|
||||
}
|
||||
else
|
||||
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
|
||||
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
|
||||
if (BE (mbclen == (size_t) -2, 0))
|
||||
{
|
||||
/* The buffer doesn't have enough space, finish to build. */
|
||||
@ -229,15 +258,11 @@ build_wcs_buffer (pstr)
|
||||
/* We treat these cases as a singlebyte character. */
|
||||
mbclen = 1;
|
||||
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
wc = pstr->trans[wc];
|
||||
pstr->cur_state = prev_st;
|
||||
}
|
||||
|
||||
/* Apply the translation if we need. */
|
||||
if (pstr->trans != NULL && mbclen == 1)
|
||||
{
|
||||
int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
|
||||
pstr->mbs_case[byte_idx] = ch;
|
||||
}
|
||||
/* Write wide character and padding. */
|
||||
pstr->wcs[byte_idx++] = wc;
|
||||
/* Write paddings. */
|
||||
@ -245,61 +270,83 @@ build_wcs_buffer (pstr)
|
||||
pstr->wcs[byte_idx++] = WEOF;
|
||||
}
|
||||
pstr->valid_len = byte_idx;
|
||||
pstr->valid_raw_len = byte_idx;
|
||||
}
|
||||
|
||||
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
|
||||
but for REG_ICASE. */
|
||||
|
||||
static void
|
||||
static int
|
||||
build_wcs_upper_buffer (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
mbstate_t prev_st;
|
||||
int byte_idx, end_idx, mbclen, remain_len;
|
||||
/* Build the buffers from pstr->valid_len to either pstr->len or
|
||||
pstr->bufs_len. */
|
||||
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
|
||||
int src_idx, byte_idx, end_idx, mbclen, remain_len;
|
||||
#ifdef _LIBC
|
||||
unsigned char buf[pstr->mb_cur_max];
|
||||
#else
|
||||
unsigned char buf[64];
|
||||
#endif
|
||||
|
||||
byte_idx = pstr->valid_len;
|
||||
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
|
||||
|
||||
#ifdef _LIBC
|
||||
/* The following optimization assumes that the wchar_t encoding is
|
||||
always ISO 10646. */
|
||||
if (! pstr->map_notascii && pstr->trans == NULL)
|
||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
|
||||
&& mbsinit (&pstr->cur_state))
|
||||
{
|
||||
/* In case of a singlebyte character. */
|
||||
pstr->mbs[byte_idx]
|
||||
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
|
||||
/* The next step uses the assumption that wchar_t is encoded
|
||||
with ISO 10646: all ASCII values can be converted like this. */
|
||||
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
|
||||
++byte_idx;
|
||||
}
|
||||
else
|
||||
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
|
||||
{
|
||||
while (byte_idx < end_idx)
|
||||
{
|
||||
wchar_t wc;
|
||||
|
||||
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
|
||||
&& mbsinit (&pstr->cur_state))
|
||||
{
|
||||
/* In case of a singlebyte character. */
|
||||
pstr->mbs[byte_idx]
|
||||
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
|
||||
/* The next step uses the assumption that wchar_t is encoded
|
||||
with ISO 10646: all ASCII values can be converted like
|
||||
this. */
|
||||
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
|
||||
++byte_idx;
|
||||
continue;
|
||||
}
|
||||
|
||||
remain_len = end_idx - byte_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrtowc (&wc,
|
||||
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
||||
+ byte_idx), remain_len, &pstr->cur_state);
|
||||
if (BE (mbclen > 1, 1))
|
||||
if (BE (mbclen > 0, 1))
|
||||
{
|
||||
wchar_t wcu = wc;
|
||||
if (iswlower (wc))
|
||||
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
|
||||
&prev_st);
|
||||
{
|
||||
int mbcdlen;
|
||||
|
||||
wcu = towupper (wc);
|
||||
mbcdlen = wcrtomb (buf, wcu, &prev_st);
|
||||
if (BE (mbclen == mbcdlen, 1))
|
||||
memcpy (pstr->mbs + byte_idx, buf, mbclen);
|
||||
else
|
||||
{
|
||||
src_idx = byte_idx;
|
||||
goto offsets_needed;
|
||||
}
|
||||
}
|
||||
else
|
||||
memcpy (pstr->mbs + byte_idx,
|
||||
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
||||
pstr->wcs[byte_idx++] = towupper (wc);
|
||||
pstr->wcs[byte_idx++] = wcu;
|
||||
/* Write paddings. */
|
||||
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||
pstr->wcs[byte_idx++] = WEOF;
|
||||
}
|
||||
else if (mbclen == (size_t) -1 || mbclen == 0)
|
||||
{
|
||||
/* It is an invalid character. Just use the byte. */
|
||||
/* It is an invalid character or '\0'. Just use the byte. */
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
pstr->mbs[byte_idx] = ch;
|
||||
/* And also cast it to wide char. */
|
||||
@ -314,48 +361,116 @@ build_wcs_upper_buffer (pstr)
|
||||
break;
|
||||
}
|
||||
}
|
||||
pstr->valid_len = byte_idx;
|
||||
pstr->valid_raw_len = byte_idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
|
||||
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
|
||||
{
|
||||
wchar_t wc;
|
||||
const char *p;
|
||||
offsets_needed:
|
||||
remain_len = end_idx - byte_idx;
|
||||
prev_st = pstr->cur_state;
|
||||
mbclen = mbrtowc (&wc,
|
||||
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
|
||||
+ byte_idx), remain_len, &pstr->cur_state);
|
||||
if (mbclen == 1)
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
{
|
||||
/* In case of a singlebyte character. */
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
/* Apply the translation if we need. */
|
||||
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
|
||||
int i, ch;
|
||||
|
||||
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
|
||||
{
|
||||
ch = pstr->trans[ch];
|
||||
pstr->mbs_case[byte_idx] = ch;
|
||||
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
|
||||
buf[i] = pstr->trans[ch];
|
||||
}
|
||||
pstr->wcs[byte_idx] = towupper (wc);
|
||||
pstr->mbs[byte_idx++] = toupper (ch);
|
||||
if (BE (mbclen == (size_t) -1, 0))
|
||||
pstr->cur_state = prev_st;
|
||||
p = (const char *) buf;
|
||||
}
|
||||
else if (BE (mbclen != (size_t) -2, 1))
|
||||
else
|
||||
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
|
||||
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
|
||||
if (BE (mbclen > 0, 1))
|
||||
{
|
||||
wchar_t wcu = wc;
|
||||
if (iswlower (wc))
|
||||
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
|
||||
{
|
||||
int mbcdlen;
|
||||
|
||||
wcu = towupper (wc);
|
||||
mbcdlen = wcrtomb (buf, wcu, &prev_st);
|
||||
if (BE (mbclen == mbcdlen, 1))
|
||||
memcpy (pstr->mbs + byte_idx, buf, mbclen);
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
if (byte_idx + mbcdlen > pstr->bufs_len)
|
||||
{
|
||||
pstr->cur_state = prev_st;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pstr->offsets == NULL)
|
||||
{
|
||||
pstr->offsets = re_malloc (int, pstr->bufs_len);
|
||||
|
||||
if (pstr->offsets == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
if (!pstr->offsets_needed)
|
||||
{
|
||||
for (i = 0; i < byte_idx; ++i)
|
||||
pstr->offsets[i] = i;
|
||||
pstr->offsets_needed = 1;
|
||||
}
|
||||
|
||||
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
|
||||
pstr->wcs[byte_idx] = wcu;
|
||||
pstr->offsets[byte_idx] = src_idx;
|
||||
for (i = 1; i < mbcdlen; ++i)
|
||||
{
|
||||
pstr->offsets[byte_idx + i]
|
||||
= src_idx + (i < mbclen ? i : mbclen - 1);
|
||||
pstr->wcs[byte_idx + i] = WEOF;
|
||||
}
|
||||
pstr->len += mbcdlen - mbclen;
|
||||
if (pstr->raw_stop > src_idx)
|
||||
pstr->stop += mbcdlen - mbclen;
|
||||
end_idx = (pstr->bufs_len > pstr->len)
|
||||
? pstr->len : pstr->bufs_len;
|
||||
byte_idx += mbcdlen;
|
||||
src_idx += mbclen;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
memcpy (pstr->mbs + byte_idx,
|
||||
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
|
||||
pstr->wcs[byte_idx++] = towupper (wc);
|
||||
memcpy (pstr->mbs + byte_idx, p, mbclen);
|
||||
|
||||
if (BE (pstr->offsets_needed != 0, 0))
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < mbclen; ++i)
|
||||
pstr->offsets[byte_idx + i] = src_idx + i;
|
||||
}
|
||||
src_idx += mbclen;
|
||||
|
||||
pstr->wcs[byte_idx++] = wcu;
|
||||
/* Write paddings. */
|
||||
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
|
||||
pstr->wcs[byte_idx++] = WEOF;
|
||||
}
|
||||
else if (mbclen == (size_t) -1 || mbclen == 0)
|
||||
{
|
||||
/* It is an invalid character. Just use the byte. */
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
|
||||
/* It is an invalid character or '\0'. Just use the byte. */
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
|
||||
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
ch = pstr->trans [ch];
|
||||
pstr->mbs[byte_idx] = ch;
|
||||
|
||||
if (BE (pstr->offsets_needed != 0, 0))
|
||||
pstr->offsets[byte_idx] = src_idx;
|
||||
++src_idx;
|
||||
|
||||
/* And also cast it to wide char. */
|
||||
pstr->wcs[byte_idx++] = (wchar_t) ch;
|
||||
if (BE (mbclen == (size_t) -1, 0))
|
||||
@ -369,6 +484,8 @@ build_wcs_upper_buffer (pstr)
|
||||
}
|
||||
}
|
||||
pstr->valid_len = byte_idx;
|
||||
pstr->valid_raw_len = src_idx;
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
|
||||
@ -385,7 +502,7 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc)
|
||||
wchar_t wc = 0;
|
||||
|
||||
/* Skip the characters which are not necessary to check. */
|
||||
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
|
||||
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
|
||||
rawbuf_idx < new_raw_idx;)
|
||||
{
|
||||
int remain_len;
|
||||
@ -420,17 +537,15 @@ build_upper_buffer (pstr)
|
||||
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
|
||||
{
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
|
||||
if (pstr->trans != NULL)
|
||||
{
|
||||
ch = pstr->trans[ch];
|
||||
pstr->mbs_case[char_idx] = ch;
|
||||
}
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
ch = pstr->trans[ch];
|
||||
if (islower (ch))
|
||||
pstr->mbs[char_idx] = toupper (ch);
|
||||
else
|
||||
pstr->mbs[char_idx] = ch;
|
||||
}
|
||||
pstr->valid_len = char_idx;
|
||||
pstr->valid_raw_len = char_idx;
|
||||
}
|
||||
|
||||
/* Apply TRANS to the buffer in PSTR. */
|
||||
@ -445,10 +560,11 @@ re_string_translate_buffer (pstr)
|
||||
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
|
||||
{
|
||||
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
|
||||
pstr->mbs_case[buf_idx] = pstr->trans[ch];
|
||||
pstr->mbs[buf_idx] = pstr->trans[ch];
|
||||
}
|
||||
|
||||
pstr->valid_len = buf_idx;
|
||||
pstr->valid_raw_len = buf_idx;
|
||||
}
|
||||
|
||||
/* This function re-construct the buffers.
|
||||
@ -468,14 +584,15 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
if (pstr->mb_cur_max > 1)
|
||||
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
pstr->len += pstr->raw_mbs_idx;
|
||||
pstr->stop += pstr->raw_mbs_idx;
|
||||
pstr->valid_len = pstr->raw_mbs_idx = 0;
|
||||
pstr->len = pstr->raw_len;
|
||||
pstr->stop = pstr->raw_stop;
|
||||
pstr->valid_len = 0;
|
||||
pstr->raw_mbs_idx = 0;
|
||||
pstr->valid_raw_len = 0;
|
||||
pstr->offsets_needed = 0;
|
||||
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
|
||||
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
|
||||
if (!MBS_CASE_ALLOCATED (pstr))
|
||||
pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
|
||||
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
|
||||
if (!pstr->mbs_allocated)
|
||||
pstr->mbs = (unsigned char *) pstr->raw_mbs;
|
||||
offset = idx;
|
||||
}
|
||||
@ -483,7 +600,13 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
if (offset != 0)
|
||||
{
|
||||
/* Are the characters which are already checked remain? */
|
||||
if (offset < pstr->valid_len)
|
||||
if (offset < pstr->valid_raw_len
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Handling this would enlarge the code too much.
|
||||
Accept a slowdown in that case. */
|
||||
&& pstr->offsets_needed == 0
|
||||
#endif
|
||||
)
|
||||
{
|
||||
/* Yes, move them to the front of the buffer. */
|
||||
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
|
||||
@ -493,13 +616,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
memmove (pstr->wcs, pstr->wcs + offset,
|
||||
(pstr->valid_len - offset) * sizeof (wint_t));
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
if (pstr->mbs_allocated)
|
||||
memmove (pstr->mbs, pstr->mbs + offset,
|
||||
pstr->valid_len - offset);
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
memmove (pstr->mbs_case, pstr->mbs_case + offset,
|
||||
pstr->valid_len - offset);
|
||||
pstr->valid_len -= offset;
|
||||
pstr->valid_raw_len -= offset;
|
||||
#if DEBUG
|
||||
assert (pstr->valid_len > 0);
|
||||
#endif
|
||||
@ -507,16 +628,26 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
else
|
||||
{
|
||||
/* No, skip all characters until IDX. */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (BE (pstr->offsets_needed, 0))
|
||||
{
|
||||
pstr->len = pstr->raw_len - idx + offset;
|
||||
pstr->stop = pstr->raw_stop - idx + offset;
|
||||
pstr->offsets_needed = 0;
|
||||
}
|
||||
#endif
|
||||
pstr->valid_len = 0;
|
||||
pstr->valid_raw_len = 0;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (pstr->mb_cur_max > 1)
|
||||
{
|
||||
int wcs_idx;
|
||||
wint_t wc = WEOF;
|
||||
|
||||
#ifdef _LIBC
|
||||
if (pstr->is_utf8)
|
||||
{
|
||||
const unsigned char *raw, *p, *end;
|
||||
const unsigned char *raw, *p, *q, *end;
|
||||
|
||||
/* Special case UTF-8. Multi-byte chars start with any
|
||||
byte other than 0x80 - 0xbf. */
|
||||
@ -527,13 +658,22 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
{
|
||||
mbstate_t cur_state;
|
||||
wchar_t wc2;
|
||||
int mlen;
|
||||
int mlen = raw + pstr->len - p;
|
||||
unsigned char buf[6];
|
||||
|
||||
q = p;
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
{
|
||||
int i = mlen < 6 ? mlen : 6;
|
||||
while (--i >= 0)
|
||||
buf[i] = pstr->trans[p[i]];
|
||||
q = buf;
|
||||
}
|
||||
/* XXX Don't use mbrtowc, we know which conversion
|
||||
to use (UTF-8 -> UCS4). */
|
||||
memset (&cur_state, 0, sizeof (cur_state));
|
||||
mlen = mbrtowc (&wc2, p, raw + pstr->len - p,
|
||||
&cur_state) - (raw + offset - p);
|
||||
mlen = mbrtowc (&wc2, p, mlen, &cur_state)
|
||||
- (raw + offset - p);
|
||||
if (mlen >= 0)
|
||||
{
|
||||
memset (&pstr->cur_state, '\0',
|
||||
@ -544,12 +684,17 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (wc == WEOF)
|
||||
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
|
||||
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
|
||||
pstr->wcs[wcs_idx] = WEOF;
|
||||
if (pstr->trans && wc <= 0xff)
|
||||
wc = pstr->trans[wc];
|
||||
if (BE (pstr->valid_len, 0))
|
||||
{
|
||||
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
|
||||
pstr->wcs[wcs_idx] = WEOF;
|
||||
if (pstr->mbs_allocated)
|
||||
memset (pstr->mbs, 255, pstr->valid_len);
|
||||
}
|
||||
pstr->valid_raw_len = pstr->valid_len;
|
||||
pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
|
||||
: ((newline && IS_WIDE_NEWLINE (wc))
|
||||
? CONTEXT_NEWLINE : 0));
|
||||
@ -565,13 +710,8 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
? CONTEXT_NEWLINE : 0));
|
||||
}
|
||||
}
|
||||
if (!MBS_CASE_ALLOCATED (pstr))
|
||||
{
|
||||
pstr->mbs_case += offset;
|
||||
/* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
|
||||
if (!MBS_ALLOCATED (pstr))
|
||||
pstr->mbs += offset;
|
||||
}
|
||||
if (!pstr->mbs_allocated)
|
||||
pstr->mbs += offset;
|
||||
}
|
||||
pstr->raw_mbs_idx = idx;
|
||||
pstr->len -= offset;
|
||||
@ -582,7 +722,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
if (pstr->mb_cur_max > 1)
|
||||
{
|
||||
if (pstr->icase)
|
||||
build_wcs_upper_buffer (pstr);
|
||||
{
|
||||
int ret = build_wcs_upper_buffer (pstr);
|
||||
if (BE (ret != REG_NOERROR, 0))
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
build_wcs_buffer (pstr);
|
||||
}
|
||||
@ -601,17 +745,95 @@ re_string_reconstruct (pstr, idx, eflags, newline)
|
||||
return REG_NOERROR;
|
||||
}
|
||||
|
||||
static unsigned char
|
||||
re_string_peek_byte_case (const re_string_t *pstr,
|
||||
int idx)
|
||||
{
|
||||
int ch, off;
|
||||
|
||||
/* Handle the common (easiest) cases first. */
|
||||
if (BE (!pstr->icase, 1))
|
||||
return re_string_peek_byte (pstr, idx);
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (pstr->mb_cur_max > 1
|
||||
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
|
||||
return re_string_peek_byte (pstr, idx);
|
||||
#endif
|
||||
|
||||
off = pstr->cur_idx + idx;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (pstr->offsets_needed)
|
||||
off = pstr->offsets[off];
|
||||
#endif
|
||||
|
||||
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
|
||||
if (pstr->trans)
|
||||
ch = pstr->trans[ch];
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
|
||||
this function returns CAPITAL LETTER I instead of first byte of
|
||||
DOTLESS SMALL LETTER I. The latter would confuse the parser,
|
||||
since peek_byte_case doesn't advance cur_idx in any way. */
|
||||
if (pstr->offsets_needed && !isascii (ch))
|
||||
return re_string_peek_byte (pstr, idx);
|
||||
#endif
|
||||
|
||||
return ch;
|
||||
}
|
||||
|
||||
static unsigned char
|
||||
re_string_fetch_byte_case (re_string_t *pstr)
|
||||
{
|
||||
int ch, off;
|
||||
|
||||
if (BE (!pstr->icase, 1))
|
||||
return re_string_fetch_byte (pstr);
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (pstr->offsets_needed)
|
||||
{
|
||||
/* For tr_TR.UTF-8 [[:islower:]] there is
|
||||
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
|
||||
in that case the whole multi-byte character and return
|
||||
the original letter. On the other side, with
|
||||
[[: DOTLESS SMALL LETTER I return [[:I, as doing
|
||||
anything else would complicate things too much. */
|
||||
|
||||
if (!re_string_first_byte (pstr, pstr->cur_idx))
|
||||
return re_string_fetch_byte (pstr);
|
||||
|
||||
off = pstr->offsets[pstr->cur_idx];
|
||||
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
|
||||
if (pstr->trans)
|
||||
ch = pstr->trans[ch];
|
||||
|
||||
if (! isascii (ch))
|
||||
return re_string_fetch_byte (pstr);
|
||||
|
||||
re_string_skip_bytes (pstr,
|
||||
re_string_char_size_at (pstr, pstr->cur_idx));
|
||||
return ch;
|
||||
}
|
||||
#endif
|
||||
|
||||
ch = pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
|
||||
if (pstr->trans)
|
||||
ch = pstr->trans[ch];
|
||||
return ch;
|
||||
}
|
||||
|
||||
static void
|
||||
re_string_destruct (pstr)
|
||||
re_string_t *pstr;
|
||||
{
|
||||
#ifdef RE_ENABLE_I18N
|
||||
re_free (pstr->wcs);
|
||||
re_free (pstr->offsets);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (MBS_ALLOCATED (pstr))
|
||||
if (pstr->mbs_allocated)
|
||||
re_free (pstr->mbs);
|
||||
if (MBS_CASE_ALLOCATED (pstr))
|
||||
re_free (pstr->mbs_case);
|
||||
}
|
||||
|
||||
/* Return the context at IDX in INPUT. */
|
||||
|
Loading…
x
Reference in New Issue
Block a user