mirror of
git://sourceware.org/git/glibc.git
synced 2025-04-24 14:41:06 +08:00
Correct handling of different collation sections.
This commit is contained in:
parent
eacc655548
commit
ab80bec0cb
@ -47,6 +47,7 @@ struct element_t;
|
|||||||
/* Data type for list of strings. */
|
/* Data type for list of strings. */
|
||||||
struct section_list
|
struct section_list
|
||||||
{
|
{
|
||||||
|
struct section_list *def_next;
|
||||||
struct section_list *next;
|
struct section_list *next;
|
||||||
/* Name of the section. */
|
/* Name of the section. */
|
||||||
const char *name;
|
const char *name;
|
||||||
@ -144,6 +145,8 @@ struct locale_collate_t
|
|||||||
int cur_weight_max;
|
int cur_weight_max;
|
||||||
|
|
||||||
/* List of known scripts. */
|
/* List of known scripts. */
|
||||||
|
struct section_list *known_sections;
|
||||||
|
/* List of used sections. */
|
||||||
struct section_list *sections;
|
struct section_list *sections;
|
||||||
/* Current section using definition. */
|
/* Current section using definition. */
|
||||||
struct section_list *current_section;
|
struct section_list *current_section;
|
||||||
@ -151,6 +154,9 @@ struct locale_collate_t
|
|||||||
struct section_list unnamed_section;
|
struct section_list unnamed_section;
|
||||||
/* To make handling of errors easier we have another section. */
|
/* To make handling of errors easier we have another section. */
|
||||||
struct section_list error_section;
|
struct section_list error_section;
|
||||||
|
/* Sometimes we are defining the values for collating symbols before
|
||||||
|
the first actual section. */
|
||||||
|
struct section_list symbol_section;
|
||||||
|
|
||||||
/* Start of the order list. */
|
/* Start of the order list. */
|
||||||
struct element_t *start;
|
struct element_t *start;
|
||||||
@ -562,7 +568,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
|
|||||||
|
|
||||||
static struct element_t *
|
static struct element_t *
|
||||||
find_element (struct linereader *ldfile, struct locale_collate_t *collate,
|
find_element (struct linereader *ldfile, struct locale_collate_t *collate,
|
||||||
const char *str, size_t len, uint32_t *wcstr)
|
const char *str, size_t len)
|
||||||
{
|
{
|
||||||
struct element_t *result = NULL;
|
struct element_t *result = NULL;
|
||||||
|
|
||||||
@ -668,13 +674,26 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
|||||||
elem->weights[weight_cnt].w[0] = NULL;
|
elem->weights[weight_cnt].w[0] = NULL;
|
||||||
elem->weights[weight_cnt].cnt = 1;
|
elem->weights[weight_cnt].cnt = 1;
|
||||||
}
|
}
|
||||||
else if (arg->tok == tok_bsymbol)
|
else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
|
||||||
{
|
{
|
||||||
struct element_t *val = find_element (ldfile, collate,
|
char ucs4str[10];
|
||||||
arg->val.str.startmb,
|
struct element_t *val;
|
||||||
arg->val.str.lenmb,
|
char *symstr;
|
||||||
arg->val.str.startwc);
|
size_t symlen;
|
||||||
|
|
||||||
|
if (arg->tok == tok_bsymbol)
|
||||||
|
{
|
||||||
|
symstr = arg->val.str.startmb;
|
||||||
|
symlen = arg->val.str.lenmb;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
|
||||||
|
symstr = ucs4str;
|
||||||
|
symlen = 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
val = find_element (ldfile, collate, symstr, symlen);
|
||||||
if (val == NULL)
|
if (val == NULL)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -720,7 +739,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
|||||||
}
|
}
|
||||||
|
|
||||||
charelem = find_element (ldfile, collate, startp,
|
charelem = find_element (ldfile, collate, startp,
|
||||||
cp - startp, NULL);
|
cp - startp);
|
||||||
++cp;
|
++cp;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -731,7 +750,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
|||||||
string as if that would be bsymbols. Otherwise we
|
string as if that would be bsymbols. Otherwise we
|
||||||
would have to match back to bsymbols somehow and this
|
would have to match back to bsymbols somehow and this
|
||||||
is normally not what people normally expect. */
|
is normally not what people normally expect. */
|
||||||
charelem = find_element (ldfile, collate, cp++, 1, NULL);
|
charelem = find_element (ldfile, collate, cp++, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (charelem == NULL)
|
if (charelem == NULL)
|
||||||
@ -1349,7 +1368,7 @@ static void
|
|||||||
collate_startup (struct linereader *ldfile, struct localedef_t *locale,
|
collate_startup (struct linereader *ldfile, struct localedef_t *locale,
|
||||||
struct localedef_t *copy_locale, int ignore_content)
|
struct localedef_t *copy_locale, int ignore_content)
|
||||||
{
|
{
|
||||||
if (!ignore_content)
|
if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
|
||||||
{
|
{
|
||||||
struct locale_collate_t *collate;
|
struct locale_collate_t *collate;
|
||||||
|
|
||||||
@ -1432,8 +1451,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
|
|||||||
or in none. */
|
or in none. */
|
||||||
for (i = 0; i < nrules; ++i)
|
for (i = 0; i < nrules; ++i)
|
||||||
for (sect = collate->sections; sect != NULL; sect = sect->next)
|
for (sect = collate->sections; sect != NULL; sect = sect->next)
|
||||||
if ((sect->rules[i] & sort_position)
|
if (sect->rules != NULL
|
||||||
!= (collate->sections->rules[i] & sort_position))
|
&& ((sect->rules[i] & sort_position)
|
||||||
|
!= (collate->sections->rules[i] & sort_position)))
|
||||||
{
|
{
|
||||||
error (0, 0, _("\
|
error (0, 0, _("\
|
||||||
%s: `position' must be used for a specific level in all sections or none"),
|
%s: `position' must be used for a specific level in all sections or none"),
|
||||||
@ -1771,7 +1791,10 @@ Computing table size for collation table might take a while..."),
|
|||||||
{
|
{
|
||||||
if (need_undefined)
|
if (need_undefined)
|
||||||
{
|
{
|
||||||
error (0, 0, _("no definition of `UNDEFINED'"));
|
/* This seems not to be enforced by recent standards. Don't
|
||||||
|
emit an error, simply append UNDEFINED at the end. */
|
||||||
|
if (0)
|
||||||
|
error (0, 0, _("no definition of `UNDEFINED'"));
|
||||||
|
|
||||||
/* Add UNDEFINED at the end. */
|
/* Add UNDEFINED at the end. */
|
||||||
collate->undefined.mborder =
|
collate->undefined.mborder =
|
||||||
@ -1793,6 +1816,8 @@ Computing table size for collation table might take a while..."),
|
|||||||
ruleset the same index. Since there are never many section we can
|
ruleset the same index. Since there are never many section we can
|
||||||
use an O(n^2) algorithm here. */
|
use an O(n^2) algorithm here. */
|
||||||
sect = collate->sections;
|
sect = collate->sections;
|
||||||
|
while (sect != NULL && sect->rules == NULL)
|
||||||
|
sect = sect->next;
|
||||||
assert (sect != NULL);
|
assert (sect != NULL);
|
||||||
ruleidx = 0;
|
ruleidx = 0;
|
||||||
do
|
do
|
||||||
@ -1800,7 +1825,8 @@ Computing table size for collation table might take a while..."),
|
|||||||
struct section_list *osect = collate->sections;
|
struct section_list *osect = collate->sections;
|
||||||
|
|
||||||
while (osect != sect)
|
while (osect != sect)
|
||||||
if (memcmp (osect->rules, sect->rules, nrules) == 0)
|
if (osect->rules != NULL
|
||||||
|
&& memcmp (osect->rules, sect->rules, nrules) == 0)
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
osect = osect->next;
|
osect = osect->next;
|
||||||
@ -1811,7 +1837,9 @@ Computing table size for collation table might take a while..."),
|
|||||||
sect->ruleidx = osect->ruleidx;
|
sect->ruleidx = osect->ruleidx;
|
||||||
|
|
||||||
/* Next section. */
|
/* Next section. */
|
||||||
sect = sect->next;
|
do
|
||||||
|
sect = sect->next;
|
||||||
|
while (sect != NULL && sect->rules == NULL);
|
||||||
}
|
}
|
||||||
while (sect != NULL);
|
while (sect != NULL);
|
||||||
/* We are currently not prepared for more than 256 rulesets. But this
|
/* We are currently not prepared for more than 256 rulesets. But this
|
||||||
@ -1993,7 +2021,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
|||||||
|
|
||||||
/* Prepare the ruleset table. */
|
/* Prepare the ruleset table. */
|
||||||
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
|
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
|
||||||
if (sect->ruleidx == i)
|
if (sect->rules != NULL && sect->ruleidx == i)
|
||||||
{
|
{
|
||||||
int j;
|
int j;
|
||||||
|
|
||||||
@ -2670,7 +2698,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
|||||||
|
|
||||||
/* Get the locale definition. */
|
/* Get the locale definition. */
|
||||||
copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
|
copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
|
||||||
repertoire_name, charmap);
|
repertoire_name, charmap, NULL);
|
||||||
if ((copy_locale->avail & COLLATE_LOCALE) == 0)
|
if ((copy_locale->avail & COLLATE_LOCALE) == 0)
|
||||||
{
|
{
|
||||||
/* Not yet loaded. So do it now. */
|
/* Not yet loaded. So do it now. */
|
||||||
@ -2708,6 +2736,19 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
|||||||
|
|
||||||
switch (nowtok)
|
switch (nowtok)
|
||||||
{
|
{
|
||||||
|
case tok_copy:
|
||||||
|
/* Allow copying other locales. */
|
||||||
|
now = lr_token (ldfile, charmap, NULL);
|
||||||
|
if (now->tok != tok_string)
|
||||||
|
goto err_label;
|
||||||
|
|
||||||
|
if (! ignore_content)
|
||||||
|
load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
|
||||||
|
charmap, result);
|
||||||
|
|
||||||
|
lr_ignore_rest (ldfile, 1);
|
||||||
|
break;
|
||||||
|
|
||||||
case tok_coll_weight_max:
|
case tok_coll_weight_max:
|
||||||
/* Ignore the rest of the line if we don't need the input of
|
/* Ignore the rest of the line if we don't need the input of
|
||||||
this line. */
|
this line. */
|
||||||
@ -2751,8 +2792,11 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
|||||||
/* Check whether this section is already known. */
|
/* Check whether this section is already known. */
|
||||||
struct section_list *known = collate->sections;
|
struct section_list *known = collate->sections;
|
||||||
while (known != NULL)
|
while (known != NULL)
|
||||||
if (strcmp (known->name, arg->val.str.startmb) == 0)
|
{
|
||||||
break;
|
if (strcmp (known->name, arg->val.str.startmb) == 0)
|
||||||
|
break;
|
||||||
|
known = known->next;
|
||||||
|
}
|
||||||
|
|
||||||
if (known != NULL)
|
if (known != NULL)
|
||||||
{
|
{
|
||||||
@ -2822,15 +2866,12 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
|||||||
repertoire, symbol, symbol_len))
|
repertoire, symbol, symbol_len))
|
||||||
goto col_elem_free;
|
goto col_elem_free;
|
||||||
|
|
||||||
if (insert_entry (&collate->elem_table,
|
insert_entry (&collate->elem_table, symbol, symbol_len,
|
||||||
symbol, symbol_len,
|
new_element (collate,
|
||||||
new_element (collate,
|
arg->val.str.startmb,
|
||||||
arg->val.str.startmb,
|
arg->val.str.lenmb - 1,
|
||||||
arg->val.str.lenmb - 1,
|
arg->val.str.startwc,
|
||||||
arg->val.str.startwc,
|
symbol, symbol_len, 0));
|
||||||
symbol, symbol_len, 0)) < 0)
|
|
||||||
lr_error (ldfile, _("\
|
|
||||||
error while adding collating element"));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2909,11 +2950,8 @@ error while adding collating element"));
|
|||||||
repertoire, symbol, symbol_len))
|
repertoire, symbol, symbol_len))
|
||||||
goto col_sym_free;
|
goto col_sym_free;
|
||||||
|
|
||||||
if (insert_entry (&collate->sym_table,
|
insert_entry (&collate->sym_table, symbol, symbol_len,
|
||||||
symbol, symbol_len,
|
new_symbol (collate));
|
||||||
new_symbol (collate)) < 0)
|
|
||||||
lr_error (ldfile, _("\
|
|
||||||
error while adding collating symbol"));
|
|
||||||
}
|
}
|
||||||
else if (symbol_len != endsymbol_len)
|
else if (symbol_len != endsymbol_len)
|
||||||
{
|
{
|
||||||
@ -2972,11 +3010,8 @@ error while adding collating symbol"));
|
|||||||
repertoire, symbuf, symbol_len))
|
repertoire, symbuf, symbol_len))
|
||||||
goto col_sym_free;
|
goto col_sym_free;
|
||||||
|
|
||||||
if (insert_entry (&collate->sym_table,
|
insert_entry (&collate->sym_table, symbuf,
|
||||||
symbuf, symbol_len,
|
symbol_len, new_symbol (collate));
|
||||||
new_symbol (collate)) < 0)
|
|
||||||
lr_error (ldfile, _("\
|
|
||||||
error while adding collating symbol"));
|
|
||||||
|
|
||||||
/* Increment the counter. */
|
/* Increment the counter. */
|
||||||
++from;
|
++from;
|
||||||
@ -3074,6 +3109,44 @@ error while adding equivalent collating symbol"));
|
|||||||
lr_ignore_rest (ldfile, 1);
|
lr_ignore_rest (ldfile, 1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case tok_script:
|
||||||
|
/* We get told about the scripts we know. */
|
||||||
|
arg = lr_token (ldfile, charmap, repertoire);
|
||||||
|
if (arg->tok != tok_bsymbol)
|
||||||
|
goto err_label;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
struct section_list *runp = collate->known_sections;
|
||||||
|
char *name;
|
||||||
|
|
||||||
|
while (runp != NULL)
|
||||||
|
if (strncmp (runp->name, arg->val.str.startmb,
|
||||||
|
arg->val.str.lenmb) == 0
|
||||||
|
&& runp->name[arg->val.str.lenmb] == '\0')
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
runp = runp->def_next;
|
||||||
|
|
||||||
|
if (runp != NULL)
|
||||||
|
{
|
||||||
|
lr_error (ldfile, _("duplicate definition of script `%s'"),
|
||||||
|
runp->name);
|
||||||
|
lr_ignore_rest (ldfile, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
runp = (struct section_list *) xcalloc (1, sizeof (*runp));
|
||||||
|
name = strncpy (xmalloc (arg->val.str.lenmb + 1),
|
||||||
|
arg->val.str.startmb, arg->val.str.lenmb);
|
||||||
|
name[arg->val.str.lenmb] = '\0';
|
||||||
|
runp->name = name;
|
||||||
|
|
||||||
|
runp->def_next = collate->known_sections;
|
||||||
|
collate->known_sections = runp;
|
||||||
|
}
|
||||||
|
lr_ignore_rest (ldfile, 1);
|
||||||
|
break;
|
||||||
|
|
||||||
case tok_order_start:
|
case tok_order_start:
|
||||||
/* Ignore the rest of the line if we don't need the input of
|
/* Ignore the rest of the line if we don't need the input of
|
||||||
this line. */
|
this line. */
|
||||||
@ -3094,10 +3167,13 @@ error while adding equivalent collating symbol"));
|
|||||||
if (arg->tok == tok_bsymbol)
|
if (arg->tok == tok_bsymbol)
|
||||||
{
|
{
|
||||||
/* This better should be a section name. */
|
/* This better should be a section name. */
|
||||||
struct section_list *sp = collate->sections;
|
struct section_list *sp = collate->known_sections;
|
||||||
while (sp != NULL
|
while (sp != NULL
|
||||||
&& strcmp (sp->name, arg->val.str.startmb) != 0)
|
&& (sp->name == NULL
|
||||||
sp = sp->next;
|
|| strncmp (sp->name, arg->val.str.startmb,
|
||||||
|
arg->val.str.lenmb) != 0
|
||||||
|
|| sp->name[arg->val.str.lenmb] != '\0'))
|
||||||
|
sp = sp->def_next;
|
||||||
|
|
||||||
if (sp == NULL)
|
if (sp == NULL)
|
||||||
{
|
{
|
||||||
@ -3109,15 +3185,21 @@ error while adding equivalent collating symbol"));
|
|||||||
|
|
||||||
if (collate->error_section.first == NULL)
|
if (collate->error_section.first == NULL)
|
||||||
{
|
{
|
||||||
collate->error_section.next = collate->sections;
|
if (collate->sections == NULL)
|
||||||
collate->sections = &collate->error_section;
|
collate->sections = &collate->error_section;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sp = collate->sections;
|
||||||
|
while (sp->next != NULL)
|
||||||
|
sp = sp->next;
|
||||||
|
|
||||||
|
collate->error_section.next = NULL;
|
||||||
|
sp->next = &collate->error_section;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Remember this section. */
|
|
||||||
collate->current_section = sp;
|
|
||||||
|
|
||||||
/* One should not be allowed to open the same
|
/* One should not be allowed to open the same
|
||||||
section twice. */
|
section twice. */
|
||||||
if (sp->first != NULL)
|
if (sp->first != NULL)
|
||||||
@ -3126,8 +3208,13 @@ error while adding equivalent collating symbol"));
|
|||||||
"LC_COLLATE", sp->name);
|
"LC_COLLATE", sp->name);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sp->next = collate->sections;
|
if (collate->current_section == NULL)
|
||||||
collate->sections = sp;
|
collate->current_section = sp;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sp->next = collate->current_section->next;
|
||||||
|
collate->current_section->next = sp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Next should come the end of the line or a semicolon. */
|
/* Next should come the end of the line or a semicolon. */
|
||||||
@ -3381,10 +3468,10 @@ error while adding equivalent collating symbol"));
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state != 1 && state != 3 && state != 5)
|
if (state != 0 && state != 1 && state != 3 && state != 5)
|
||||||
goto err_label;
|
goto err_label;
|
||||||
|
|
||||||
if (state == 5 && nowtok == tok_ucs4)
|
if ((state == 0 || state == 5) && nowtok == tok_ucs4)
|
||||||
goto err_label;
|
goto err_label;
|
||||||
|
|
||||||
if (nowtok == tok_ucs4)
|
if (nowtok == tok_ucs4)
|
||||||
@ -3399,7 +3486,41 @@ error while adding equivalent collating symbol"));
|
|||||||
symlen = arg->val.str.lenmb;
|
symlen = arg->val.str.lenmb;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state == 3)
|
if (state == 0)
|
||||||
|
{
|
||||||
|
/* We are outside an `order_start' region. This means
|
||||||
|
we must only accept definitions of values for
|
||||||
|
collation symbols since these are purely abstract
|
||||||
|
values and don't need dorections associated. */
|
||||||
|
struct element_t *seqp;
|
||||||
|
|
||||||
|
if (find_entry (&collate->seq_table, symstr, symlen,
|
||||||
|
(void **) &seqp) == 0)
|
||||||
|
{
|
||||||
|
/* It's already defined. First check whether this
|
||||||
|
is really a collating symbol. */
|
||||||
|
if (seqp->is_character)
|
||||||
|
goto err_label;
|
||||||
|
|
||||||
|
goto move_entry;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
void *result;
|
||||||
|
|
||||||
|
if (find_entry (&collate->sym_table, symstr, symlen,
|
||||||
|
&result) != 0)
|
||||||
|
/* No collating symbol, it's an error. */
|
||||||
|
goto err_label;
|
||||||
|
|
||||||
|
/* Maybe this is the first time we define a symbol
|
||||||
|
value and it is before the first actual section. */
|
||||||
|
if (collate->sections == NULL)
|
||||||
|
collate->sections = collate->current_section =
|
||||||
|
&collate->symbol_section;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (state == 3)
|
||||||
{
|
{
|
||||||
/* It is possible that we already have this collation sequence.
|
/* It is possible that we already have this collation sequence.
|
||||||
In this case we move the entry. */
|
In this case we move the entry. */
|
||||||
@ -3416,6 +3537,7 @@ error while adding equivalent collating symbol"));
|
|||||||
if (find_entry (&collate->seq_table, symstr, symlen,
|
if (find_entry (&collate->seq_table, symstr, symlen,
|
||||||
(void **) &seqp) == 0)
|
(void **) &seqp) == 0)
|
||||||
{
|
{
|
||||||
|
move_entry:
|
||||||
/* Remove the entry from the old position. */
|
/* Remove the entry from the old position. */
|
||||||
if (seqp->last == NULL)
|
if (seqp->last == NULL)
|
||||||
collate->start = seqp->next;
|
collate->start = seqp->next;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user