mirror of
git://sourceware.org/git/glibc.git
synced 2024-11-27 03:41:23 +08:00
Update.
1999-12-31 Ulrich Drepper <drepper@cygnus.com> * locale/langinfo.h: Add constants for wide character collation symbol table. * locale/categories.def: Add appropriate entries for collate symbol table entries. * locale/C-collate.c: Add initializers for new entries. Remove commented out code. * locale/elem-hash.h: New file. * locale/Makefile (distribute): Add elem-hash.h. * locale/programs/ld-collate.c: Implement output of collate symbol table. * posix/regex.c: Implement collation class handling.
This commit is contained in:
parent
6fec974edd
commit
3216711f5d
15
ChangeLog
15
ChangeLog
@ -1,3 +1,18 @@
|
||||
1999-12-31 Ulrich Drepper <drepper@cygnus.com>
|
||||
|
||||
* locale/langinfo.h: Add constants for wide character collation
|
||||
symbol table.
|
||||
* locale/categories.def: Add appropriate entries for collate symbol
|
||||
table entries.
|
||||
* locale/C-collate.c: Add initializers for new entries.
|
||||
Remove commented out code.
|
||||
* locale/elem-hash.h: New file.
|
||||
* locale/Makefile (distribute): Add elem-hash.h.
|
||||
* locale/programs/ld-collate.c: Implement output of collate symbol
|
||||
table.
|
||||
|
||||
* posix/regex.c: Implement collation class handling.
|
||||
|
||||
1999-12-30 Ulrich Drepper <drepper@cygnus.com>
|
||||
|
||||
* posix/regex.c (regex_compile): Implement equivalence class handling.
|
||||
|
@ -20,137 +20,12 @@
|
||||
#include <endian.h>
|
||||
#include "localeinfo.h"
|
||||
|
||||
#if 0
|
||||
/* These tables' entries contain values which make the function behave
|
||||
according to POSIX.2 Table 2-8 ``LC_COLLATE Category Definition in
|
||||
the POSIX Locale''. */
|
||||
|
||||
const uint32_t _nl_C_LC_COLLATE_symbol_hash[446] =
|
||||
{
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000154u, 0x00000060u, 0xffffffffu, 0xffffffffu,
|
||||
0x0000004fu, 0x0000001au, 0x00000085u, 0x00000030u, 0xffffffffu, 0xffffffffu,
|
||||
0x000002beu, 0x000000fau, 0xffffffffu, 0xffffffffu, 0x0000014eu, 0x0000005eu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000bbu, 0x00000044u,
|
||||
0xffffffffu, 0xffffffffu, 0x000000efu, 0x0000004cu, 0x00000147u, 0x0000005cu,
|
||||
0x000000a0u, 0x0000003eu, 0x00000000u, 0x00000000u, 0x00000038u, 0x00000016u,
|
||||
0x00000094u, 0x00000038u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000140u, 0x0000005au, 0x0000018cu, 0x00000076u,
|
||||
0x0000007du, 0x0000002cu, 0xffffffffu, 0xffffffffu, 0x00000115u, 0x00000052u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000285u, 0x000000deu,
|
||||
0x00000171u, 0x0000006cu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0x00000289u, 0x000000e2u, 0x000002d8u, 0x000000feu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000022u, 0x00000010u,
|
||||
0x0000028fu, 0x000000e8u, 0x00000069u, 0x00000022u, 0x0000006du, 0x00000024u,
|
||||
0x00000071u, 0x00000026u, 0x00000075u, 0x00000028u, 0xffffffffu, 0xffffffffu,
|
||||
0x00000295u, 0x000000eeu, 0xffffffffu, 0xffffffffu, 0x00000297u, 0x000000f0u,
|
||||
0xffffffffu, 0xffffffffu, 0x00000299u, 0x000000f2u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000213u, 0x000000b6u,
|
||||
0xffffffffu, 0xffffffffu, 0x00000014u, 0x0000000au, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000227u, 0x000000b8u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x0000015du, 0x00000064u,
|
||||
0xffffffffu, 0xffffffffu, 0x000001ffu, 0x000000a2u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x0000013au, 0x00000058u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000010u, 0x00000008u,
|
||||
0x000001dfu, 0x00000082u, 0x000001e1u, 0x00000084u, 0x00000167u, 0x00000068u,
|
||||
0x00000004u, 0x00000002u, 0x000001e7u, 0x0000008au, 0x00000186u, 0x00000074u,
|
||||
0x000001ebu, 0x0000008eu, 0x000001edu, 0x00000090u, 0x000001efu, 0x00000092u,
|
||||
0x000001f1u, 0x00000094u, 0x000001f3u, 0x00000096u, 0x000001f5u, 0x00000098u,
|
||||
0x000001f7u, 0x0000009au, 0x000001f9u, 0x0000009cu, 0x000001a5u, 0x0000007au,
|
||||
0x000001fdu, 0x000000a0u, 0x00000030u, 0x00000014u, 0x00000201u, 0x000000a4u,
|
||||
0x00000203u, 0x000000a6u, 0x00000205u, 0x000000a8u, 0x00000207u, 0x000000aau,
|
||||
0x00000209u, 0x000000acu, 0x0000020bu, 0x000000aeu, 0x0000020du, 0x000000b0u,
|
||||
0x0000020fu, 0x000000b2u, 0x00000211u, 0x000000b4u, 0xffffffffu, 0xffffffffu,
|
||||
0x0000009cu, 0x0000003cu, 0xffffffffu, 0xffffffffu, 0x00000098u, 0x0000003au,
|
||||
0x0000016cu, 0x0000006au, 0xffffffffu, 0xffffffffu, 0x00000269u, 0x000000c2u,
|
||||
0x0000026bu, 0x000000c4u, 0x0000026du, 0x000000c6u, 0x0000026fu, 0x000000c8u,
|
||||
0x00000271u, 0x000000cau, 0x00000273u, 0x000000ccu, 0x00000275u, 0x000000ceu,
|
||||
0x00000277u, 0x000000d0u, 0x00000279u, 0x000000d2u, 0x0000027bu, 0x000000d4u,
|
||||
0x0000027du, 0x000000d6u, 0x0000027fu, 0x000000d8u, 0x00000281u, 0x000000dau,
|
||||
0x00000283u, 0x000000dcu, 0x00000090u, 0x00000036u, 0x00000287u, 0x000000e0u,
|
||||
0x0000005fu, 0x0000001cu, 0x0000028bu, 0x000000e4u, 0x0000028du, 0x000000e6u,
|
||||
0x00000089u, 0x00000032u, 0x000001c3u, 0x0000007eu, 0x00000293u, 0x000000ecu,
|
||||
0x00000062u, 0x0000001eu, 0x000001b1u, 0x0000007cu, 0x00000130u, 0x00000056u,
|
||||
0x0000029bu, 0x000000f4u, 0x00000196u, 0x00000078u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000081u, 0x0000002eu, 0x00000251u, 0x000000beu,
|
||||
0x00000079u, 0x0000002au, 0x0000029du, 0x000000f6u, 0xffffffffu, 0xffffffffu,
|
||||
0x0000025cu, 0x000000c0u, 0xffffffffu, 0xffffffffu, 0x0000002cu, 0x00000012u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000a4u, 0x00000040u,
|
||||
0xffffffffu, 0xffffffffu, 0x000002b0u, 0x000000f8u, 0xffffffffu, 0xffffffffu,
|
||||
0x000000f9u, 0x0000004eu, 0xffffffffu, 0xffffffffu, 0x0000001cu, 0x0000000eu,
|
||||
0xffffffffu, 0xffffffffu, 0x0000017bu, 0x00000070u, 0x0000000cu, 0x00000006u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000001e3u, 0x00000086u,
|
||||
0xffffffffu, 0xffffffffu, 0x000001e5u, 0x00000088u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x000001d1u, 0x00000080u, 0x000001e9u, 0x0000008cu,
|
||||
0x0000008cu, 0x00000034u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000291u, 0x000000eau, 0xffffffffu, 0xffffffffu,
|
||||
0x00000008u, 0x00000004u, 0xffffffffu, 0xffffffffu, 0x00000181u, 0x00000072u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x00000231u, 0x000000bau,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000cau, 0x00000046u,
|
||||
0x00000246u, 0x000000bcu, 0xffffffffu, 0xffffffffu, 0x000001fbu, 0x0000009eu,
|
||||
0x000000d6u, 0x00000048u, 0x00000018u, 0x0000000cu, 0xffffffffu, 0xffffffffu,
|
||||
0x00000159u, 0x00000062u, 0xffffffffu, 0xffffffffu, 0x000000aau, 0x00000042u,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000000e2u, 0x0000004au,
|
||||
0x00000175u, 0x0000006eu, 0xffffffffu, 0xffffffffu, 0x00000104u, 0x00000050u,
|
||||
0x00000065u, 0x00000020u, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0x000002d2u, 0x000000fcu,
|
||||
0xffffffffu, 0xffffffffu, 0x00000161u, 0x00000066u, 0x00000045u, 0x00000018u,
|
||||
0xffffffffu, 0xffffffffu, 0x00000127u, 0x00000054u, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
|
||||
0xffffffffu, 0xffffffffu
|
||||
};
|
||||
|
||||
const char _nl_C_LC_COLLATE_symbol_strings[732] =
|
||||
"NUL\0" "SOH\0" "STX\0" "ETX\0" "EOT\0" "ENQ\0" "ACK\0" "alert\0"
|
||||
"backspace\0" "tab\0" "newline\0" "vertical-tab\0" "form-feed\0"
|
||||
"carriage-return\0" "SI\0" "SO\0" "DLE\0" "DC1\0" "DC2\0" "DC3\0" "DC4\0"
|
||||
"NAK\0" "SYN\0" "ETB\0" "CAN\0" "EM\0" "SUB\0" "ESC\0" "IS4\0" "IS3\0"
|
||||
"IS2\0" "IS1\0" "space\0" "exclamation-mark\0" "quotation-mark\0"
|
||||
"number-sign\0" "dollar-sign\0" "percent-sign\0" "ampersand\0"
|
||||
"apostrophe\0" "left-parenthesis\0" "right-parenthesis\0" "asterisk\0"
|
||||
"plus-sign\0" "comma\0" "hyphen\0" "period\0" "slash\0" "zero\0" "one\0"
|
||||
"two\0" "three\0" "four\0" "five\0" "six\0" "seven\0" "eight\0" "nine\0"
|
||||
"colon\0" "semicolon\0" "less-than-sign\0" "equals-sign\0"
|
||||
"greater-than-sign\0" "question-mark\0" "commercial-at\0" "A\0" "B\0" "C\0"
|
||||
"D\0" "E\0" "F\0" "G\0" "H\0" "I\0" "J\0" "K\0" "L\0" "M\0" "N\0" "O\0"
|
||||
"P\0" "Q\0" "R\0" "S\0" "T\0" "U\0" "V\0" "W\0" "X\0" "Y\0" "Z\0"
|
||||
"left-square-bracket\0" "backslash\0" "right-square-bracket\0"
|
||||
"circumflex\0" "underscore\0" "grave-accent\0" "a\0" "b\0" "c\0" "d\0" "e\0"
|
||||
"f\0" "g\0" "h\0" "i\0" "j\0" "k\0" "l\0" "m\0" "n\0" "o\0" "p\0" "q\0"
|
||||
"r\0" "s\0" "t\0" "u\0" "v\0" "w\0" "x\0" "y\0" "z\0" "left-curly-bracket\0"
|
||||
"vertical-line\0" "right-curly-bracket\0" "tilde\0" "DEL\0";
|
||||
|
||||
const uint32_t _nl_C_LC_COLLATE_symbol_classes[256] =
|
||||
{
|
||||
1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7,
|
||||
1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 14, 1, 15,
|
||||
1, 16, 1, 17, 1, 18, 1, 19, 1, 20, 1, 21, 1, 22, 1, 23,
|
||||
1, 24, 1, 25, 1, 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 31,
|
||||
1, 32, 1, 33, 1, 34, 1, 35, 1, 36, 1, 37, 1, 38, 1, 39,
|
||||
1, 40, 1, 41, 1, 42, 1, 43, 1, 44, 1, 45, 1, 46, 1, 47,
|
||||
1, 48, 1, 49, 1, 50, 1, 51, 1, 52, 1, 53, 1, 54, 1, 55,
|
||||
1, 56, 1, 57, 1, 58, 1, 59, 1, 60, 1, 61, 1, 62, 1, 63,
|
||||
1, 64, 1, 65, 1, 66, 1, 67, 1, 68, 1, 69, 1, 70, 1, 71,
|
||||
1, 72, 1, 73, 1, 74, 1, 75, 1, 76, 1, 77, 1, 78, 1, 79,
|
||||
1, 80, 1, 81, 1, 82, 1, 83, 1, 84, 1, 85, 1, 86, 1, 87,
|
||||
1, 88, 1, 89, 1, 90, 1, 91, 1, 92, 1, 93, 1, 94, 1, 95,
|
||||
1, 96, 1, 97, 1, 98, 1, 99, 1, 100, 1, 101, 1, 102, 1, 103,
|
||||
1, 104, 1, 105, 1, 106, 1, 107, 1, 108, 1, 109, 1, 110, 1, 111,
|
||||
1, 112, 1, 113, 1, 114, 1, 115, 1, 116, 1, 117, 1, 118, 1, 119,
|
||||
1, 120, 1, 121, 1, 122, 1, 123, 1, 124, 1, 125, 1, 126, 1, 127
|
||||
};
|
||||
#endif
|
||||
|
||||
const struct locale_data _nl_C_LC_COLLATE =
|
||||
{
|
||||
_nl_C_name,
|
||||
NULL, 0, 0, /* no file mapped */
|
||||
UNDELETABLE,
|
||||
13,
|
||||
16,
|
||||
{
|
||||
{ word: 0 },
|
||||
{ string: NULL },
|
||||
@ -164,6 +39,9 @@ const struct locale_data _nl_C_LC_COLLATE =
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ string: NULL },
|
||||
{ word: 0 },
|
||||
{ string: NULL },
|
||||
{ string: NULL }
|
||||
}
|
||||
};
|
||||
|
@ -23,7 +23,7 @@ subdir := locale
|
||||
|
||||
headers = locale.h langinfo.h xlocale.h
|
||||
distribute = localeinfo.h categories.def iso-639.def iso-3166.def \
|
||||
iso-4217.def weight.h strlen-hash.h \
|
||||
iso-4217.def weight.h strlen-hash.h elem-hash.h \
|
||||
$(addprefix programs/, \
|
||||
locale.c localedef.c \
|
||||
$(localedef-modules:=.c) $(locale-modules:=.c) \
|
||||
|
@ -42,19 +42,22 @@ DEFINE_CATEGORY
|
||||
(
|
||||
LC_COLLATE, "LC_COLLATE",
|
||||
(
|
||||
DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE, "collate-hash-size", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS, "collate-hash-layers", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_NAMES, "collate-names", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_NRULES, "collate-nrules", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_RULESETS, "collate-rulesets", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE, "collate-hash-size", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS, "collate-hash-layers", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_NAMES, "collate-names", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_TABLEWC, "collate-tablewc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_WEIGHTWC, "collate-weightwc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC, "collate-extrawc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_INDIRECTWC, "collate-indirectwc", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZEMB, "collate-symb-hash-sizemb", std, word)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_TABLEMB, "collate-symb-tablemb", std, string)
|
||||
DEFINE_ELEMENT (_NL_COLLATE_SYMB_EXTRAMB, "collate-symb-extramb", std, string)
|
||||
), NO_POSTLOAD)
|
||||
|
||||
|
||||
|
34
locale/elem-hash.h
Normal file
34
locale/elem-hash.h
Normal file
@ -0,0 +1,34 @@
|
||||
/* Copyright (C) 1999 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Written by Ulrich Drepper, <drepper@cygnus.com>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with the GNU C Library; see the file COPYING.LIB. If not,
|
||||
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
|
||||
/* The hashing function used for the table with collation symbols. */
|
||||
static inline int32_t
|
||||
elem_hash (const char *str, int_fast32_t n)
|
||||
{
|
||||
int32_t result = n;
|
||||
|
||||
while (n > 0)
|
||||
{
|
||||
n <<= 3;
|
||||
n += *str++;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
@ -243,6 +243,9 @@ enum
|
||||
_NL_COLLATE_WEIGHTWC,
|
||||
_NL_COLLATE_EXTRAWC,
|
||||
_NL_COLLATE_INDIRECTWC,
|
||||
_NL_COLLATE_SYMB_HASH_SIZEMB,
|
||||
_NL_COLLATE_SYMB_TABLEMB,
|
||||
_NL_COLLATE_SYMB_EXTRAMB,
|
||||
_NL_NUM_LC_COLLATE,
|
||||
|
||||
/* LC_CTYPE category: character classification.
|
||||
|
@ -25,12 +25,14 @@
|
||||
#include <error.h>
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include "charmap.h"
|
||||
#include "localeinfo.h"
|
||||
#include "linereader.h"
|
||||
#include "locfile.h"
|
||||
#include "localedef.h"
|
||||
#include "elem-hash.h"
|
||||
|
||||
/* Uncomment the following line in the production version. */
|
||||
/* #define NDEBUG 1 */
|
||||
@ -88,11 +90,13 @@ struct element_t
|
||||
we changed if necessary but I doubt this is necessary. */
|
||||
unsigned int used_in_level;
|
||||
|
||||
struct element_list_t *weights;
|
||||
/* Index in the `weight' table in the output file for the character. */
|
||||
int32_t weights_idx;
|
||||
|
||||
/* Nonzero if this is a real character definition. */
|
||||
int is_character;
|
||||
|
||||
struct element_list_t *weights;
|
||||
|
||||
/* Where does the definition come from. */
|
||||
const char *file;
|
||||
size_t line;
|
||||
@ -297,6 +301,7 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
|
||||
|
||||
/* Will be allocated later. */
|
||||
newp->weights = NULL;
|
||||
newp->weights_idx = 0;
|
||||
|
||||
newp->file = NULL;
|
||||
newp->line = 0;
|
||||
@ -1804,6 +1809,9 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
|
||||
obstack_grow (pool, buf, len);
|
||||
}
|
||||
|
||||
/* Remember the index. */
|
||||
elem->weights_idx = retval;
|
||||
|
||||
return retval | ((elem->section->ruleidx & 0x7f) << 24);
|
||||
}
|
||||
|
||||
@ -1866,7 +1874,10 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
uint32_t *names;
|
||||
uint32_t *tablewc;
|
||||
size_t table_size;
|
||||
uint32_t elem_size;
|
||||
uint32_t *elem_table;
|
||||
int i;
|
||||
struct element_t *runp;
|
||||
|
||||
data.magic = LIMAGIC (LC_COLLATE);
|
||||
data.n = nelems;
|
||||
@ -2381,6 +2392,110 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
++cnt;
|
||||
|
||||
|
||||
/* Finally write the table with collation element names out. It is
|
||||
a hash table with a simple function which gets the name of the
|
||||
character as the input. One character might have many names. The
|
||||
value associated with the name is an index into the weight table
|
||||
where we are then interested in the first-level weight value.
|
||||
|
||||
To determine how large the table should be we are counting the
|
||||
elements have to put in. Since we are using internal chaining
|
||||
using a secondary hash function we have to make the table a bit
|
||||
larger to avoid extremely long search times. We can achieve
|
||||
good results with a 40% larger table than there are entries. */
|
||||
elem_size = 0;
|
||||
runp = collate->start;
|
||||
while (runp != NULL)
|
||||
{
|
||||
if (runp->mbs != NULL && runp->weights != NULL)
|
||||
/* Yep, the element really counts. */
|
||||
++elem_size;
|
||||
|
||||
runp = runp->next;
|
||||
}
|
||||
/* Add 40% and find the next prime number. */
|
||||
elem_size = MIN (next_prime (elem_size * 1.4), 257);
|
||||
|
||||
/* Allocate the table. Each entry consists of two words: the hash
|
||||
value and an index in a secondary table which provides the index
|
||||
into the weight table and the string itself (so that a match can
|
||||
be determined). */
|
||||
elem_table = (uint32_t *) obstack_alloc (&extrapool,
|
||||
elem_size * 2 * sizeof (uint32_t));
|
||||
memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
|
||||
|
||||
/* Now add the elements. */
|
||||
runp = collate->start;
|
||||
while (runp != NULL)
|
||||
{
|
||||
if (runp->mbs != NULL && runp->weights != NULL)
|
||||
{
|
||||
/* Compute the hash value of the name. */
|
||||
uint32_t namelen = strlen (runp->name);
|
||||
uint32_t hash = elem_hash (runp->name, namelen);
|
||||
size_t idx = hash % elem_size;
|
||||
|
||||
if (elem_table[idx * 2] != 0)
|
||||
{
|
||||
/* The spot is already take. Try iterating using the value
|
||||
from the secondary hashing function. */
|
||||
size_t iter = hash % (elem_size - 2);
|
||||
|
||||
do
|
||||
{
|
||||
idx += iter;
|
||||
if (idx >= elem_size)
|
||||
idx -= elem_size;
|
||||
}
|
||||
while (elem_table[idx * 2] != 0);
|
||||
|
||||
/* This is the spot where we will insert the value. */
|
||||
elem_table[idx * 2] = hash;
|
||||
elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
|
||||
|
||||
/* Now add the index into the weights table. We know the
|
||||
address is always 32bit aligned. */
|
||||
if (sizeof (int) == sizeof (int32_t))
|
||||
obstack_int_grow (&extrapool, runp->weights_idx);
|
||||
else
|
||||
obstack_grow (&extrapool, &runp->weights_idx,
|
||||
sizeof (int32_t));
|
||||
|
||||
/* The the string itself including length. */
|
||||
obstack_1grow (&extrapool, namelen);
|
||||
obstack_grow (&extrapool, runp->name, namelen);
|
||||
|
||||
/* And align again to 32 bits. */
|
||||
if ((1 + namelen) % sizeof (int32_t) != 0)
|
||||
obstack_grow (&extrapool, "\0\0",
|
||||
(sizeof (int32_t)
|
||||
- (1 + namelen) % sizeof (int32_t)));
|
||||
}
|
||||
}
|
||||
|
||||
runp = runp->next;
|
||||
}
|
||||
|
||||
/* Prepare to write out this data. */
|
||||
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB));
|
||||
iov[2 + cnt].iov_base = &elem_size;
|
||||
iov[2 + cnt].iov_len = sizeof (int32_t);
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
++cnt;
|
||||
|
||||
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB));
|
||||
iov[2 + cnt].iov_base = elem_table;
|
||||
iov[2 + cnt].iov_len = elem_size * 2 * sizeof (int32_t);
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
++cnt;
|
||||
|
||||
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
|
||||
iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
|
||||
iov[2 + cnt].iov_base = obstack_finish (&extrapool);
|
||||
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
|
||||
++cnt;
|
||||
|
||||
|
||||
assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
|
||||
|
||||
write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
|
||||
|
168
posix/regex.c
168
posix/regex.c
@ -82,6 +82,7 @@
|
||||
|
||||
/* We are also using some library internals. */
|
||||
# include <locale/localeinfo.h>
|
||||
# include <locale/elem-hash.h>
|
||||
# include <langinfo.h>
|
||||
#endif
|
||||
|
||||
@ -2378,12 +2379,13 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
had_char_class = false;
|
||||
}
|
||||
}
|
||||
#ifdef _LIBC
|
||||
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
|
||||
{
|
||||
unsigned char str[MB_LEN_MAX + 1];
|
||||
#ifdef _LIBC
|
||||
uint32_t nrules =
|
||||
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
|
||||
#endif
|
||||
|
||||
PATFETCH (c);
|
||||
c1 = 0;
|
||||
@ -2412,7 +2414,9 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
character set and therefore we cannot have character
|
||||
with more than one byte in the multibyte
|
||||
representation. */
|
||||
#ifdef _LIBC
|
||||
if (nrules == 0)
|
||||
#endif
|
||||
{
|
||||
if (c1 != 1)
|
||||
FREE_STACK_RETURN (REG_ECOLLATE);
|
||||
@ -2424,6 +2428,7 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
/* Set the bit for the character. */
|
||||
SET_LIST_BIT (str[0]);
|
||||
}
|
||||
#ifdef _LIBC
|
||||
else
|
||||
{
|
||||
/* Try to match the byte sequence in `str' against
|
||||
@ -2495,8 +2500,168 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
had_char_class = true;
|
||||
}
|
||||
}
|
||||
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
|
||||
{
|
||||
unsigned char str[128]; /* Should be large enough. */
|
||||
#ifdef _LIBC
|
||||
uint32_t nrules =
|
||||
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
|
||||
#endif
|
||||
|
||||
PATFETCH (c);
|
||||
c1 = 0;
|
||||
|
||||
/* If pattern is `[[='. */
|
||||
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
PATFETCH (c);
|
||||
if ((c == '.' && *p == ']') || p == pend)
|
||||
break;
|
||||
if (c1 < sizeof (str))
|
||||
str[c1++] = c;
|
||||
else
|
||||
/* This is in any case an invalid class name. */
|
||||
str[0] = '\0';
|
||||
}
|
||||
str[c1] = '\0';
|
||||
|
||||
if (c == '.' && *p == ']' && str[0] != '\0')
|
||||
{
|
||||
/* If we have no collation data we use the default
|
||||
collation in which each character is the name
|
||||
for its own class which contains only the one
|
||||
character. It also means that ASCII is the
|
||||
character set and therefore we cannot have character
|
||||
with more than one byte in the multibyte
|
||||
representation. */
|
||||
#ifdef _LIBC
|
||||
if (nrules == 0)
|
||||
#endif
|
||||
{
|
||||
if (c1 != 1)
|
||||
FREE_STACK_RETURN (REG_ECOLLATE);
|
||||
|
||||
/* Throw away the ] at the end of the equivalence
|
||||
class. */
|
||||
PATFETCH (c);
|
||||
|
||||
/* Set the bit for the character. */
|
||||
SET_LIST_BIT (str[0]);
|
||||
}
|
||||
#ifdef _LIBC
|
||||
else
|
||||
{
|
||||
/* Try to match the byte sequence in `str' against
|
||||
those known to the collate implementation.
|
||||
First find out whether the bytes in `str' are
|
||||
actually from exactly one character. */
|
||||
const unsigned char *weights;
|
||||
int32_t table_size;
|
||||
const int32_t *table;
|
||||
const int32_t *symb_table;
|
||||
const unsigned char *extra;
|
||||
int32_t idx;
|
||||
int32_t elem;
|
||||
const unsigned char *cp = str;
|
||||
int32_t weight;
|
||||
int32_t second;
|
||||
int32_t hash;
|
||||
int ch;
|
||||
|
||||
table = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
|
||||
weights = (const unsigned char *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
|
||||
table_size =
|
||||
_NL_CURRENT_WORD (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_HASH_SIZEMB);
|
||||
symb_table = (const int32_t *)
|
||||
_NL_CURRENT (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_TABLEMB);
|
||||
extra = (const unsigned char *)
|
||||
_NL_CURRENT (LC_COLLATE,
|
||||
_NL_COLLATE_SYMB_EXTRAMB);
|
||||
|
||||
/* Locate the character in the hashing table. */
|
||||
hash = elem_hash (str, c1);
|
||||
|
||||
idx = 0;
|
||||
elem = hash % table_size;
|
||||
second = hash % (table_size - 2);
|
||||
while (symb_table[2 * elem] != 0)
|
||||
{
|
||||
/* First compare the hashing value. */
|
||||
if (symb_table[2 * elem] == hash
|
||||
&& (c1 == extra[symb_table[2 * elem + 1]
|
||||
+ sizeof (int32_t)])
|
||||
&& memcmp (str,
|
||||
&extra[symb_table[2 * elem + 1]
|
||||
+ sizeof (int32_t) + 1],
|
||||
c1) == 0)
|
||||
{
|
||||
/* Yep, this is the entry. */
|
||||
idx = *((int32_t *)
|
||||
(extra
|
||||
+ symb_table[2 * elem + 1]));
|
||||
break;
|
||||
}
|
||||
|
||||
/* Next entry. */
|
||||
elem += second;
|
||||
}
|
||||
|
||||
if (symb_table[2 * elem] == 0)
|
||||
/* This is no valid character. */
|
||||
FREE_STACK_RETURN (REG_ECOLLATE);
|
||||
|
||||
/* Throw away the ] at the end of the equivalence
|
||||
class. */
|
||||
PATFETCH (c);
|
||||
|
||||
/* Now we have to go throught the whole table
|
||||
and find all characters which have the same
|
||||
first level weight.
|
||||
|
||||
XXX Note that this is not entirely correct.
|
||||
we would have to match multibyte sequences
|
||||
but this is not possible with the current
|
||||
implementation. */
|
||||
for (ch = 1; ch < 256; ++ch)
|
||||
/* XXX This test would have to be changed if we
|
||||
would allow matching multibyte sequences. */
|
||||
if (table[ch] > 0)
|
||||
{
|
||||
int32_t idx2 = table[ch];
|
||||
size_t len = weights[idx2];
|
||||
|
||||
/* Test whether the lenghts match. */
|
||||
if (weights[idx] == len)
|
||||
{
|
||||
/* They do. New compare the bytes of
|
||||
the weight. */
|
||||
size_t cnt = 0;
|
||||
|
||||
while (cnt < len
|
||||
&& (weights[idx + 1 + cnt]
|
||||
== weights[idx2 + 1 + cnt]))
|
||||
++len;
|
||||
|
||||
if (cnt == len)
|
||||
/* They match. Mark the character as
|
||||
acceptable. */
|
||||
SET_LIST_BIT (ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
had_char_class = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
c1++;
|
||||
@ -2507,7 +2672,6 @@ regex_compile (pattern, size, syntax, bufp)
|
||||
had_char_class = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
had_char_class = false;
|
||||
|
Loading…
Reference in New Issue
Block a user