* posix/regex_internal.c (re_string_reconstruct): Handle

offset < pstr->valid_raw_len && pstr->offsets_needed case.
	Ensure no bytes read before raw_mbs array.  Pass a saved copy of
	pstr->valid_len - 1 rather than pstr->valid_raw_len - 1 to
	re_string_context_at.
	* posix/Makefile: Add rules to build and run bug-regex26 test.
	* posix/bug-regex26.c: New test.
This commit is contained in:
Jakub Jelinek 2006-09-07 10:42:04 +00:00
parent 8021fca541
commit eaa2ea12a2
5 changed files with 141 additions and 25 deletions

View File

@ -1,5 +1,13 @@
2006-09-06 Jakub Jelinek <jakub@redhat.com>
* posix/regex_internal.c (re_string_reconstruct): Handle
offset < pstr->valid_raw_len && pstr->offsets_needed case.
Ensure no bytes read before raw_mbs array. Pass a saved copy of
pstr->valid_len - 1 rather than pstr->valid_raw_len - 1 to
re_string_context_at.
* posix/Makefile: Add rules to build and run bug-regex26 test.
* posix/bug-regex26.c: New test.
* locale/programs/ld-collate.c (collate_read): Goto sym_equiv_free
rather than col_sym_free. Move seqp declaration earlier.

View File

@ -1455,10 +1455,13 @@ rm -f *.filelist*
%changelog
* Thu Sep 7 2006 Jakub Jelinek <jakub@redhat.com> 2.4.90-30
- fix or_IN February name (#204730)
- fix pthread_create called from cancellation handlers (BZ#3124)
- add librtkaio, to use it add /%{lib}/rtkaio to your
LD_LIBRARY_PATH or /etc/ld.so.conf
- fix or_IN February name (#204730)
- fix pthread_create called from cancellation handlers (BZ#3124)
- fix regex case insensitive searches with characters where upper
and lower case multibyte representations have different length
(e.g. I and dotless i, #202991)
* Tue Sep 5 2006 Jakub Jelinek <jakub@redhat.com> 2.4.90-29
- randomize resolver query ids before use instead after use (#205113)

View File

@ -81,7 +81,7 @@ tests := tstgetopt testfnm runtests runptests \
bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
bug-regex25 tst-nice tst-nanosleep tst-regex2 \
bug-regex25 bug-regex26 tst-nice tst-nanosleep tst-regex2 \
transbug tst-rxspencer tst-pcre tst-boost \
bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \
tst-getaddrinfo2 bug-glob1 bug-glob2 tst-sysconf \
@ -190,6 +190,7 @@ bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex25-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
tst-rxspencer-ARGS = --utf8 rxspencer/tests
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
tst-pcre-ARGS = PCRE.tests

38
posix/bug-regex26.c Normal file
View File

@ -0,0 +1,38 @@
/* Test re_search with dotless i.
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2006.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <locale.h>
#include <regex.h>
#include <string.h>
int
main (void)
{
struct re_pattern_buffer r;
struct re_registers s;
setlocale (LC_ALL, "en_US.UTF-8");
memset (&r, 0, sizeof (r));
memset (&s, 0, sizeof (s));
re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | RE_ICASE);
re_compile_pattern ("insert into", 11, &r);
re_search (&r, "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK",
15, 0, 15, &s);
return 0;
}

View File

@ -585,34 +585,98 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
if (BE (offset != 0, 1))
{
/* Are the characters which are already checked remain? */
if (BE (offset < pstr->valid_raw_len, 1)
#ifdef RE_ENABLE_I18N
/* Handling this would enlarge the code too much.
Accept a slowdown in that case. */
&& pstr->offsets_needed == 0
#endif
)
/* Should the already checked characters be kept? */
if (BE (offset < pstr->valid_raw_len, 1))
{
/* Yes, move them to the front of the buffer. */
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
if (BE (pstr->offsets_needed, 0))
{
int low = 0, high = pstr->valid_len, mid;
do
{
mid = (high + low) / 2;
if (pstr->offsets[mid] > offset)
high = mid;
else if (pstr->offsets[mid] < offset)
low = mid + 1;
else
break;
}
while (low < high);
if (pstr->offsets[mid] < offset)
++mid;
pstr->tip_context = re_string_context_at (pstr, mid - 1,
eflags);
/* This can be quite complicated, so handle specially
only the common and easy case where the character with
different length representation of lower and upper
case is present at or after offset. */
if (pstr->valid_len > offset
&& mid == offset && pstr->offsets[mid] == offset)
{
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
for (low = 0; low < pstr->valid_len; low++)
pstr->offsets[low] = pstr->offsets[low + offset] - offset;
}
else
{
/* Otherwise, just find out how long the partial multibyte
character at offset is and fill it with WEOF/255. */
pstr->len = pstr->raw_len - idx + offset;
pstr->stop = pstr->raw_stop - idx + offset;
pstr->offsets_needed = 0;
while (mid > 0 && pstr->offsets[mid - 1] == offset)
--mid;
while (mid < pstr->valid_len)
if (pstr->wcs[mid] != WEOF)
break;
else
++mid;
if (mid == pstr->valid_len)
pstr->valid_len = 0;
else
{
pstr->valid_len = pstr->offsets[mid] - offset;
if (pstr->valid_len)
{
for (low = 0; low < pstr->valid_len; ++low)
pstr->wcs[low] = WEOF;
memset (pstr->mbs, 255, pstr->valid_len);
}
}
pstr->valid_raw_len = pstr->valid_len;
}
}
else
#endif
{
pstr->tip_context = re_string_context_at (pstr, offset - 1,
eflags);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
#endif
}
}
else
{
/* No, skip all characters until IDX. */
int prev_valid_len = pstr->valid_len;
#ifdef RE_ENABLE_I18N
if (BE (pstr->offsets_needed, 0))
{
@ -636,6 +700,8 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max);
if (end < pstr->raw_mbs)
end = pstr->raw_mbs;
p = raw + offset - 1;
#ifdef _LIBC
/* We know the wchar_t encoding is UCS4, so for the simple
@ -643,7 +709,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
if (isascii (*p) && BE (pstr->trans == NULL, 1))
{
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
pstr->valid_len = 0;
/* pstr->valid_len = 0; */
wc = (wchar_t) *p;
}
else
@ -686,7 +752,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
if (wc == WEOF)
pstr->tip_context
= re_string_context_at (pstr, pstr->valid_raw_len - 1, eflags);
= re_string_context_at (pstr, prev_valid_len - 1, eflags);
else
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
&& IS_WIDE_WORD_CHAR (wc))