diff --git a/ChangeLog b/ChangeLog index 300a096115..7426d6d709 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2002-04-18 Bruno Haible + + * iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for + FROM_DIRECTION): Make the FROM direction stateless. + * iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for + FROM_DIRECTION): Likewise. + 2002-04-15 Bruno Haible * iconvdata/JISX0213.TXT: New file. @@ -45,6 +52,7 @@ (__cns11643l2*_to_ucs4_tab, __cns11643_from_ucs4p0_tab, __cns11643_from_ucs4p2_tab): Regenerated. (__cns11643_from_ucs4p2c_tab): New array. + * iconvdata/cvs11643l1.c: Update comments. * iconvdata/EUC-TW.irreversible: Remove two entries. 2002-04-15 Bruno Haible @@ -96,6 +104,7 @@ * sysdeps/unix/sysv/linux/hppa/syscalls.list: Likewise. * sysdeps/unix/sysv/linux/ia64/syscalls.list: Likewise. * sysdeps/unix/sysv/linux/mips/syscalls.list: Likewise. + * sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list: Likewise. * sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list: Likewise. * sysdeps/unix/sysv/linux/x86_64/syscalls.list: Likewise. diff --git a/NEWS b/NEWS index 6bd0f4b563..3a693d0f2b 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,9 @@ Version 2.3 * Isamu Hasegawa contributed a completely new and POSIX conforming implementation of regex. + +* Bruno Haible upgraded the iconv and locale implementation to support + Unicode 3.2. Version 2.2.5 diff --git a/iconvdata/SHIFT_JISX0213.irreversible b/iconvdata/SHIFT_JISX0213.irreversible new file mode 100644 index 0000000000..cf99b7265e --- /dev/null +++ b/iconvdata/SHIFT_JISX0213.irreversible @@ -0,0 +1,2 @@ +0x5C 0x005C +0x7E 0x007E diff --git a/iconvdata/cns11643l1.c b/iconvdata/cns11643l1.c index 7e179e36c6..1ea1297204 100644 --- a/iconvdata/cns11643l1.c +++ b/iconvdata/cns11643l1.c @@ -1,5 +1,5 @@ /* Mapping tables for CNS 11643, plane 1 handling. - Copyright (C) 1998, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1998, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998. @@ -20,9 +20,9 @@ #include -/* To generate a Unicode 3.1 CNS11643.TXT, take +/* To generate a Unicode 3.2 CNS11643.TXT, take http://www.unicode.org/Public/Mappings/EASTASIA/OTHER/CNS11643.TXT - and add the following lines (see Unicode 3.1 UNIHAN.TXT): + and add the following lines (see Unicode 3.2 UNIHAN.TXT): 0x12728 0x4EA0 # 0x1272F 0x51AB # 0x12734 0x52F9 # diff --git a/iconvdata/euc-jisx0213.c b/iconvdata/euc-jisx0213.c index b0836b173d..c696f94ee8 100644 --- a/iconvdata/euc-jisx0213.c +++ b/iconvdata/euc-jisx0213.c @@ -62,9 +62,7 @@ *statep = saved_state -/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state - contains the last UCS-4 character, shifted by 3 bits. - During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state +/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state contains the last two bytes to be output, shifted by 3 bits. */ /* Since this is a stateful encoding we have to provide code which resets @@ -74,17 +72,8 @@ if (data->__statep->__count != 0) \ { \ if (FROM_DIRECTION) \ - { \ - if (__builtin_expect (outbuf + 4 <= outend, 1)) \ - { \ - /* Write out the last character. */ \ - *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \ - data->__statep->__count = 0; \ - } \ - else \ - /* We don't have enough room in the output buffer. */ \ - status = __GCONV_FULL_OUTPUT; \ - } \ + /* We don't use shift states in the FROM_DIRECTION. */ \ + data->__statep->__count = 0; \ else \ { \ if (__builtin_expect (outbuf + 2 <= outend, 1)) \ @@ -109,33 +98,44 @@ #define LOOPFCT FROM_LOOP #define BODY \ { \ - uint32_t ch; \ + uint32_t ch = *inptr; \ \ - /* Determine whether there is a buffered character pending. */ \ - ch = *statep >> 3; \ - if (__builtin_expect (ch == 0, 1)) \ + if (ch < 0x80) \ + /* Plain ASCII character. */ \ + ++inptr; \ + else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \ { \ - /* No - so look at the next input byte. */ \ - ch = *inptr; \ - if (ch < 0x80) \ - /* Plain ASCII character. */ \ - ++inptr; \ - else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \ - { \ - /* Two or three byte character. */ \ - uint32_t ch2; \ + /* Two or three byte character. */ \ + uint32_t ch2; \ \ - if (__builtin_expect (inptr + 1 >= inend, 0)) \ + if (__builtin_expect (inptr + 1 >= inend, 0)) \ + { \ + /* The second byte is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch2 = inptr[1]; \ + \ + /* The second byte must be >= 0xa1 and <= 0xfe. */ \ + if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \ + { \ + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ { \ - /* The second byte is not available. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ + result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ \ - ch2 = inptr[1]; \ + ++inptr; \ + ++*irreversible; \ + break; \ + } \ \ - /* The second byte must be >= 0xa1 and <= 0xfe. */ \ - if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \ + if (ch == 0x8e) \ + { \ + /* Half-width katakana. */ \ + if (__builtin_expect (ch2 > 0xdf, 0)) \ { \ /* This is an illegal character. */ \ if (! ignore_errors_p ()) \ @@ -149,107 +149,89 @@ break; \ } \ \ - if (ch == 0x8e) \ - { \ - /* Half-width katakana. */ \ - if (__builtin_expect (ch2 > 0xdf, 0)) \ - { \ - /* This is an illegal character. */ \ - if (! ignore_errors_p ()) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ - \ - ++inptr; \ - ++*irreversible; \ - break; \ - } \ - \ - ch = ch2 + 0xfec0; \ - inptr += 2; \ - } \ - else \ - { \ - const unsigned char *endp; \ - \ - if (ch == 0x8f) \ - { \ - /* JISX 0213 plane 2. */ \ - uint32_t ch3; \ - \ - if (__builtin_expect (inptr + 2 >= inend, 0)) \ - { \ - /* The third byte is not available. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - ch3 = inptr[2]; \ - endp = inptr + 3; \ - \ - ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \ - } \ - else \ - { \ - /* JISX 0213 plane 1. */ \ - endp = inptr + 2; \ - \ - ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \ - } \ - \ - if (ch == 0) \ - { \ - /* This is an illegal character. */ \ - if (! ignore_errors_p ()) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ - \ - ++inptr; \ - ++*irreversible; \ - break; \ - } \ - \ - inptr = endp; \ - \ - if (ch < 0x80) \ - { \ - /* It's a combining character. */ \ - uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ - uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ - \ - /* See whether we have room for two characters. */ \ - if (outptr + 8 <= outend) \ - { \ - put32 (outptr, u1); \ - outptr += 4; \ - put32 (outptr, u2); \ - outptr += 4; \ - continue; \ - } \ - \ - /* Otherwise store only the first character now, and \ - put the second one into the queue. */ \ - ch = u1; \ - *statep = u2 << 3; \ - } \ - } \ + ch = ch2 + 0xfec0; \ + inptr += 2; \ } \ else \ { \ - /* This is illegal. */ \ - if (! ignore_errors_p ()) \ + const unsigned char *endp; \ + \ + if (ch == 0x8f) \ { \ - result = __GCONV_ILLEGAL_INPUT; \ + /* JISX 0213 plane 2. */ \ + uint32_t ch3; \ + \ + if (__builtin_expect (inptr + 2 >= inend, 0)) \ + { \ + /* The third byte is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + ch3 = inptr[2]; \ + endp = inptr + 3; \ + \ + ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \ + } \ + else \ + { \ + /* JISX 0213 plane 1. */ \ + endp = inptr + 2; \ + \ + ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \ + } \ + \ + if (ch == 0) \ + { \ + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++inptr; \ + ++*irreversible; \ break; \ } \ \ - ++inptr; \ - ++*irreversible; \ - continue; \ + inptr = endp; \ + \ + if (ch < 0x80) \ + { \ + /* It's a combining character. */ \ + uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ + uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ + \ + /* See whether we have room for two characters. */ \ + if (outptr + 8 <= outend) \ + { \ + put32 (outptr, u1); \ + outptr += 4; \ + put32 (outptr, u2); \ + outptr += 4; \ + continue; \ + } \ + else \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + } \ } \ + } \ + else \ + { \ + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++inptr; \ + ++*irreversible; \ + continue; \ } \ \ put32 (outptr, ch); \ diff --git a/iconvdata/shift_jisx0213.c b/iconvdata/shift_jisx0213.c index 81fef70ac0..0c1ca722c3 100644 --- a/iconvdata/shift_jisx0213.c +++ b/iconvdata/shift_jisx0213.c @@ -62,9 +62,7 @@ *statep = saved_state -/* During Shift_JISX0213 to UCS-4 conversion, the COUNT element of the state - contains the last UCS-4 character, shifted by 3 bits. - During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state +/* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state contains the last two bytes to be output, shifted by 3 bits. */ /* Since this is a stateful encoding we have to provide code which resets @@ -74,17 +72,8 @@ if (data->__statep->__count != 0) \ { \ if (FROM_DIRECTION) \ - { \ - if (__builtin_expect (outbuf + 4 <= outend, 1)) \ - { \ - /* Write out the last character. */ \ - *((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \ - data->__statep->__count = 0; \ - } \ - else \ - /* We don't have enough room in the output buffer. */ \ - status = __GCONV_FULL_OUTPUT; \ - } \ + /* We don't use shift states in the FROM_DIRECTION. */ \ + data->__statep->__count = 0; \ else \ { \ if (__builtin_expect (outbuf + 2 <= outend, 1)) \ @@ -109,126 +98,41 @@ #define LOOPFCT FROM_LOOP #define BODY \ { \ - uint32_t ch; \ + uint32_t ch = *inptr; \ \ - /* Determine whether there is a buffered character pending. */ \ - ch = *statep >> 3; \ - if (__builtin_expect (ch == 0, 1)) \ + if (ch < 0x80) \ { \ - /* No - so look at the next input byte. */ \ - ch = *inptr; \ - if (ch < 0x80) \ + /* Plain ISO646-JP character. */ \ + if (__builtin_expect (ch == 0x5c, 0)) \ + ch = 0xa5; \ + else if (__builtin_expect (ch == 0x7e, 0)) \ + ch = 0x203e; \ + ++inptr; \ + } \ + else if (ch >= 0xa1 && ch <= 0xdf) \ + { \ + /* Half-width katakana. */ \ + ch += 0xfec0; \ + ++inptr; \ + } \ + else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \ + { \ + /* Two byte character. */ \ + uint32_t ch2; \ + \ + if (__builtin_expect (inptr + 1 >= inend, 0)) \ { \ - /* Plain ISO646-JP character. */ \ - if (__builtin_expect (ch == 0x5c, 0)) \ - ch = 0xa5; \ - else if (__builtin_expect (ch == 0x7e, 0)) \ - ch = 0x203e; \ - ++inptr; \ + /* The second byte is not available. */ \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ } \ - else if (ch >= 0xa1 && ch <= 0xdf) \ + \ + ch2 = inptr[1]; \ + \ + /* The second byte must be in the range 0x{40..7E,80..FC}. */ \ + if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0)) \ { \ - /* Half-width katakana. */ \ - ch += 0xfec0; \ - ++inptr; \ - } \ - else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \ - { \ - /* Two byte character. */ \ - uint32_t ch2; \ - \ - if (__builtin_expect (inptr + 1 >= inend, 0)) \ - { \ - /* The second byte is not available. */ \ - result = __GCONV_INCOMPLETE_INPUT; \ - break; \ - } \ - \ - ch2 = inptr[1]; \ - \ - /* The second byte must be in the range 0x{40..7E,80..FC}. */ \ - if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))\ - { \ - /* This is an illegal character. */ \ - if (! ignore_errors_p ()) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ - \ - ++inptr; \ - ++*irreversible; \ - break; \ - } \ - \ - /* Convert to row and column. */ \ - if (ch < 0xe0) \ - ch -= 0x81; \ - else \ - ch -= 0xc1; \ - if (ch2 < 0x80) \ - ch2 -= 0x40; \ - else \ - ch2 -= 0x41; \ - /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \ - ch = 2 * ch; \ - if (ch2 >= 0x5e) \ - ch2 -= 0x5e, ch++; \ - ch2 += 0x21; \ - if (ch >= 0x5e) \ - { \ - /* Handling of JISX 0213 plane 2 rows. */ \ - if (ch >= 0x67) \ - ch += 230; \ - else if (ch >= 0x63 || ch == 0x5f) \ - ch += 168; \ - else \ - ch += 162; \ - } \ - \ - ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \ - \ - if (ch == 0) \ - { \ - /* This is an illegal character. */ \ - if (! ignore_errors_p ()) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ - \ - ++inptr; \ - ++*irreversible; \ - break; \ - } \ - \ - inptr += 2; \ - \ - if (ch < 0x80) \ - { \ - /* It's a combining character. */ \ - uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ - uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ - \ - /* See whether we have room for two characters. */ \ - if (outptr + 8 <= outend) \ - { \ - put32 (outptr, u1); \ - outptr += 4; \ - put32 (outptr, u2); \ - outptr += 4; \ - continue; \ - } \ - \ - /* Otherwise store only the first character now, and \ - put the second one into the queue. */ \ - ch = u1; \ - *statep = u2 << 3; \ - } \ - } \ - else \ - { \ - /* This is illegal. */ \ + /* This is an illegal character. */ \ if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ @@ -237,8 +141,86 @@ \ ++inptr; \ ++*irreversible; \ - continue; \ + break; \ } \ + \ + /* Convert to row and column. */ \ + if (ch < 0xe0) \ + ch -= 0x81; \ + else \ + ch -= 0xc1; \ + if (ch2 < 0x80) \ + ch2 -= 0x40; \ + else \ + ch2 -= 0x41; \ + /* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \ + ch = 2 * ch; \ + if (ch2 >= 0x5e) \ + ch2 -= 0x5e, ch++; \ + ch2 += 0x21; \ + if (ch >= 0x5e) \ + { \ + /* Handling of JISX 0213 plane 2 rows. */ \ + if (ch >= 0x67) \ + ch += 230; \ + else if (ch >= 0x63 || ch == 0x5f) \ + ch += 168; \ + else \ + ch += 162; \ + } \ + \ + ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \ + \ + if (ch == 0) \ + { \ + /* This is an illegal character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++inptr; \ + ++*irreversible; \ + break; \ + } \ + \ + inptr += 2; \ + \ + if (ch < 0x80) \ + { \ + /* It's a combining character. */ \ + uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ + uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ + \ + /* See whether we have room for two characters. */ \ + if (outptr + 8 <= outend) \ + { \ + put32 (outptr, u1); \ + outptr += 4; \ + put32 (outptr, u2); \ + outptr += 4; \ + continue; \ + } \ + else \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + } \ + } \ + else \ + { \ + /* This is illegal. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + ++inptr; \ + ++*irreversible; \ + continue; \ } \ \ put32 (outptr, ch); \ diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 5ac53d5c60..ec4936fe0e 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,129 @@ +2002-04-15 Bruno Haible + + * charmaps/EUC-JISX0213: New file. + * charmaps/SHIFT_JISX0213: New file. + +2002-04-15 Bruno Haible + + * charmaps/BIG5-HKSCS: Update to Unicode 3.2. + +2002-04-15 Bruno Haible + + * charmaps/EUC-TW: Add many mappings for characters introduced in + Unicode 3.1 and 3.2. + +2002-04-15 Bruno Haible + + * charmaps/GB18030: Update for Unicode 3.2: + Add , , .., .., + , .., .., .., + .., .., .., , + .., .., .., .., + .., .., .., , + .., , .., , .., + .., .., .., .., + , .., .., .., + .., .., .., .., + .., .., .., .., + .., .., .., , + .., .., .., .., + , , , .., , .., + .., , ... Update width table. + +2002-04-15 Bruno Haible + + * charmaps/UTF-8: Update for Unicode 3.2: + Add , , .., .., + , .., .., .., + .., .., .., , + .., .., .., .., + .., .., .., , + .., , .., , .., + .., .., .., .., + , .., .., .., + .., .., .., .., + .., .., .., .., + .., .., .., , + .., .., .., .., + , , , .., , .., + .., , ... Update width table. + Assign width 2 to .., .., ... + +2002-04-15 Bruno Haible + + Update to Unicode 3.2. + * locales/translit_circle: Add .., ... + * locales/translit_cjk_compat: Change . Add ... + * locales/translit_combining: Add more combining characters. + * locales/translit_compat: Add , , , , + ... + * locales/translit_font: Add .., ... + * locales/translit_neutral: Add .., , + ... + * locales/translit_small: Add small Hiragana letters and small + Katakana letters. + * locales/translit_wide: Add ... + +2002-04-15 Bruno Haible + + * locales/i18n (LC_CTYPE): Update to Unicode 3.2. In detail: + (upper): Add , , , , , , + , , , , , , , . + (lower): Add , , , , , , + , , , , , , , . + (alpha): Add , .., .., + .., .., .., .., + .., , , .., .., + .., .., .., .., + , , , .., .., + .., .., , , .., + .., . + (space): Add . + (punct): Add , .., , .., + .., .., .., , + .., , .., .., + .., .., .., .., + .., , .., .., + .., .., .., .., + .., .., .., .., + .., , , .., .., + .., , , , , .., + ... Remove , , . + (graph): Add , , .., .., + , .., .., .., + .., .., .., , + .., .., .., .., + .., .., .., , + .., , .., , .., + .., .., .., .., + , .., .., .., + .., .., .., .., + .., .., .., .., + .., .., .., , + .., .., .., .., + , , , .., , .., + .., , ... + (print): Likewise. Also add . + (blank): Add . + (toupper): Add (,), (,), (,), + (,), (,), (,), + (,), (,), (,), + (,), (,), (,), + (,), (,). + (totitle): Likewise. + (tolower): Add (,), (,), (,), + (,), (,), (,), + (,), (,), (,), + (,), (,), (,), + (,), (,). + (combining): Add , .., .., + .., .., .., .., + .., ... Remove , . + (combining_level3): Add , .., .., + .., .., .., .., + , ... Remove , . + * locales/tr_TR: Likewise. + 2002-04-15 Bruno Haible * charmaps/IBM856: New file. diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list b/sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list index bb5fdccca4..1dbd61f2df 100644 --- a/sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list +++ b/sysdeps/unix/sysv/linux/s390/s390-64/syscalls.list @@ -27,7 +27,7 @@ semctl - semctl i:iiii __semctl semctl # proper socket implementations: accept - accept i:iBN __libc_accept __accept accept bind - bind i:ipi __bind bind -connect - connect i:ipi __libc_connect __connect connect +connect - connect i:ipi __libc_connect __connect_internal __connect connect getpeername - getpeername i:ipp __getpeername getpeername getsockname - getsockname i:ipp __getsockname getsockname getsockopt - getsockopt i:iiiBN __getsockopt getsockopt