Correct handling of different collation sections.

This commit is contained in:
Ulrich Drepper 2000-06-25 18:01:25 +00:00
parent eacc655548
commit ab80bec0cb

View File

@ -47,6 +47,7 @@ struct element_t;
/* Data type for list of strings. */ /* Data type for list of strings. */
struct section_list struct section_list
{ {
struct section_list *def_next;
struct section_list *next; struct section_list *next;
/* Name of the section. */ /* Name of the section. */
const char *name; const char *name;
@ -144,6 +145,8 @@ struct locale_collate_t
int cur_weight_max; int cur_weight_max;
/* List of known scripts. */ /* List of known scripts. */
struct section_list *known_sections;
/* List of used sections. */
struct section_list *sections; struct section_list *sections;
/* Current section using definition. */ /* Current section using definition. */
struct section_list *current_section; struct section_list *current_section;
@ -151,6 +154,9 @@ struct locale_collate_t
struct section_list unnamed_section; struct section_list unnamed_section;
/* To make handling of errors easier we have another section. */ /* To make handling of errors easier we have another section. */
struct section_list error_section; struct section_list error_section;
/* Sometimes we are defining the values for collating symbols before
the first actual section. */
struct section_list symbol_section;
/* Start of the order list. */ /* Start of the order list. */
struct element_t *start; struct element_t *start;
@ -562,7 +568,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
static struct element_t * static struct element_t *
find_element (struct linereader *ldfile, struct locale_collate_t *collate, find_element (struct linereader *ldfile, struct locale_collate_t *collate,
const char *str, size_t len, uint32_t *wcstr) const char *str, size_t len)
{ {
struct element_t *result = NULL; struct element_t *result = NULL;
@ -668,13 +674,26 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
elem->weights[weight_cnt].w[0] = NULL; elem->weights[weight_cnt].w[0] = NULL;
elem->weights[weight_cnt].cnt = 1; elem->weights[weight_cnt].cnt = 1;
} }
else if (arg->tok == tok_bsymbol) else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
{ {
struct element_t *val = find_element (ldfile, collate, char ucs4str[10];
arg->val.str.startmb, struct element_t *val;
arg->val.str.lenmb, char *symstr;
arg->val.str.startwc); size_t symlen;
if (arg->tok == tok_bsymbol)
{
symstr = arg->val.str.startmb;
symlen = arg->val.str.lenmb;
}
else
{
snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
symstr = ucs4str;
symlen = 9;
}
val = find_element (ldfile, collate, symstr, symlen);
if (val == NULL) if (val == NULL)
break; break;
@ -720,7 +739,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
} }
charelem = find_element (ldfile, collate, startp, charelem = find_element (ldfile, collate, startp,
cp - startp, NULL); cp - startp);
++cp; ++cp;
} }
else else
@ -731,7 +750,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
string as if that would be bsymbols. Otherwise we string as if that would be bsymbols. Otherwise we
would have to match back to bsymbols somehow and this would have to match back to bsymbols somehow and this
is normally not what people normally expect. */ is normally not what people normally expect. */
charelem = find_element (ldfile, collate, cp++, 1, NULL); charelem = find_element (ldfile, collate, cp++, 1);
} }
if (charelem == NULL) if (charelem == NULL)
@ -1349,7 +1368,7 @@ static void
collate_startup (struct linereader *ldfile, struct localedef_t *locale, collate_startup (struct linereader *ldfile, struct localedef_t *locale,
struct localedef_t *copy_locale, int ignore_content) struct localedef_t *copy_locale, int ignore_content)
{ {
if (!ignore_content) if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
{ {
struct locale_collate_t *collate; struct locale_collate_t *collate;
@ -1432,8 +1451,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
or in none. */ or in none. */
for (i = 0; i < nrules; ++i) for (i = 0; i < nrules; ++i)
for (sect = collate->sections; sect != NULL; sect = sect->next) for (sect = collate->sections; sect != NULL; sect = sect->next)
if ((sect->rules[i] & sort_position) if (sect->rules != NULL
!= (collate->sections->rules[i] & sort_position)) && ((sect->rules[i] & sort_position)
!= (collate->sections->rules[i] & sort_position)))
{ {
error (0, 0, _("\ error (0, 0, _("\
%s: `position' must be used for a specific level in all sections or none"), %s: `position' must be used for a specific level in all sections or none"),
@ -1771,7 +1791,10 @@ Computing table size for collation table might take a while..."),
{ {
if (need_undefined) if (need_undefined)
{ {
error (0, 0, _("no definition of `UNDEFINED'")); /* This seems not to be enforced by recent standards. Don't
emit an error, simply append UNDEFINED at the end. */
if (0)
error (0, 0, _("no definition of `UNDEFINED'"));
/* Add UNDEFINED at the end. */ /* Add UNDEFINED at the end. */
collate->undefined.mborder = collate->undefined.mborder =
@ -1793,6 +1816,8 @@ Computing table size for collation table might take a while..."),
ruleset the same index. Since there are never many section we can ruleset the same index. Since there are never many section we can
use an O(n^2) algorithm here. */ use an O(n^2) algorithm here. */
sect = collate->sections; sect = collate->sections;
while (sect != NULL && sect->rules == NULL)
sect = sect->next;
assert (sect != NULL); assert (sect != NULL);
ruleidx = 0; ruleidx = 0;
do do
@ -1800,7 +1825,8 @@ Computing table size for collation table might take a while..."),
struct section_list *osect = collate->sections; struct section_list *osect = collate->sections;
while (osect != sect) while (osect != sect)
if (memcmp (osect->rules, sect->rules, nrules) == 0) if (osect->rules != NULL
&& memcmp (osect->rules, sect->rules, nrules) == 0)
break; break;
else else
osect = osect->next; osect = osect->next;
@ -1811,7 +1837,9 @@ Computing table size for collation table might take a while..."),
sect->ruleidx = osect->ruleidx; sect->ruleidx = osect->ruleidx;
/* Next section. */ /* Next section. */
sect = sect->next; do
sect = sect->next;
while (sect != NULL && sect->rules == NULL);
} }
while (sect != NULL); while (sect != NULL);
/* We are currently not prepared for more than 256 rulesets. But this /* We are currently not prepared for more than 256 rulesets. But this
@ -1993,7 +2021,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
/* Prepare the ruleset table. */ /* Prepare the ruleset table. */
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
if (sect->ruleidx == i) if (sect->rules != NULL && sect->ruleidx == i)
{ {
int j; int j;
@ -2670,7 +2698,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
/* Get the locale definition. */ /* Get the locale definition. */
copy_locale = load_locale (LC_COLLATE, now->val.str.startmb, copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
repertoire_name, charmap); repertoire_name, charmap, NULL);
if ((copy_locale->avail & COLLATE_LOCALE) == 0) if ((copy_locale->avail & COLLATE_LOCALE) == 0)
{ {
/* Not yet loaded. So do it now. */ /* Not yet loaded. So do it now. */
@ -2708,6 +2736,19 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
switch (nowtok) switch (nowtok)
{ {
case tok_copy:
/* Allow copying other locales. */
now = lr_token (ldfile, charmap, NULL);
if (now->tok != tok_string)
goto err_label;
if (! ignore_content)
load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
charmap, result);
lr_ignore_rest (ldfile, 1);
break;
case tok_coll_weight_max: case tok_coll_weight_max:
/* Ignore the rest of the line if we don't need the input of /* Ignore the rest of the line if we don't need the input of
this line. */ this line. */
@ -2751,8 +2792,11 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
/* Check whether this section is already known. */ /* Check whether this section is already known. */
struct section_list *known = collate->sections; struct section_list *known = collate->sections;
while (known != NULL) while (known != NULL)
if (strcmp (known->name, arg->val.str.startmb) == 0) {
break; if (strcmp (known->name, arg->val.str.startmb) == 0)
break;
known = known->next;
}
if (known != NULL) if (known != NULL)
{ {
@ -2822,15 +2866,12 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
repertoire, symbol, symbol_len)) repertoire, symbol, symbol_len))
goto col_elem_free; goto col_elem_free;
if (insert_entry (&collate->elem_table, insert_entry (&collate->elem_table, symbol, symbol_len,
symbol, symbol_len, new_element (collate,
new_element (collate, arg->val.str.startmb,
arg->val.str.startmb, arg->val.str.lenmb - 1,
arg->val.str.lenmb - 1, arg->val.str.startwc,
arg->val.str.startwc, symbol, symbol_len, 0));
symbol, symbol_len, 0)) < 0)
lr_error (ldfile, _("\
error while adding collating element"));
} }
else else
{ {
@ -2909,11 +2950,8 @@ error while adding collating element"));
repertoire, symbol, symbol_len)) repertoire, symbol, symbol_len))
goto col_sym_free; goto col_sym_free;
if (insert_entry (&collate->sym_table, insert_entry (&collate->sym_table, symbol, symbol_len,
symbol, symbol_len, new_symbol (collate));
new_symbol (collate)) < 0)
lr_error (ldfile, _("\
error while adding collating symbol"));
} }
else if (symbol_len != endsymbol_len) else if (symbol_len != endsymbol_len)
{ {
@ -2972,11 +3010,8 @@ error while adding collating symbol"));
repertoire, symbuf, symbol_len)) repertoire, symbuf, symbol_len))
goto col_sym_free; goto col_sym_free;
if (insert_entry (&collate->sym_table, insert_entry (&collate->sym_table, symbuf,
symbuf, symbol_len, symbol_len, new_symbol (collate));
new_symbol (collate)) < 0)
lr_error (ldfile, _("\
error while adding collating symbol"));
/* Increment the counter. */ /* Increment the counter. */
++from; ++from;
@ -3074,6 +3109,44 @@ error while adding equivalent collating symbol"));
lr_ignore_rest (ldfile, 1); lr_ignore_rest (ldfile, 1);
break; break;
case tok_script:
/* We get told about the scripts we know. */
arg = lr_token (ldfile, charmap, repertoire);
if (arg->tok != tok_bsymbol)
goto err_label;
else
{
struct section_list *runp = collate->known_sections;
char *name;
while (runp != NULL)
if (strncmp (runp->name, arg->val.str.startmb,
arg->val.str.lenmb) == 0
&& runp->name[arg->val.str.lenmb] == '\0')
break;
else
runp = runp->def_next;
if (runp != NULL)
{
lr_error (ldfile, _("duplicate definition of script `%s'"),
runp->name);
lr_ignore_rest (ldfile, 0);
break;
}
runp = (struct section_list *) xcalloc (1, sizeof (*runp));
name = strncpy (xmalloc (arg->val.str.lenmb + 1),
arg->val.str.startmb, arg->val.str.lenmb);
name[arg->val.str.lenmb] = '\0';
runp->name = name;
runp->def_next = collate->known_sections;
collate->known_sections = runp;
}
lr_ignore_rest (ldfile, 1);
break;
case tok_order_start: case tok_order_start:
/* Ignore the rest of the line if we don't need the input of /* Ignore the rest of the line if we don't need the input of
this line. */ this line. */
@ -3094,10 +3167,13 @@ error while adding equivalent collating symbol"));
if (arg->tok == tok_bsymbol) if (arg->tok == tok_bsymbol)
{ {
/* This better should be a section name. */ /* This better should be a section name. */
struct section_list *sp = collate->sections; struct section_list *sp = collate->known_sections;
while (sp != NULL while (sp != NULL
&& strcmp (sp->name, arg->val.str.startmb) != 0) && (sp->name == NULL
sp = sp->next; || strncmp (sp->name, arg->val.str.startmb,
arg->val.str.lenmb) != 0
|| sp->name[arg->val.str.lenmb] != '\0'))
sp = sp->def_next;
if (sp == NULL) if (sp == NULL)
{ {
@ -3109,15 +3185,21 @@ error while adding equivalent collating symbol"));
if (collate->error_section.first == NULL) if (collate->error_section.first == NULL)
{ {
collate->error_section.next = collate->sections; if (collate->sections == NULL)
collate->sections = &collate->error_section; collate->sections = &collate->error_section;
else
{
sp = collate->sections;
while (sp->next != NULL)
sp = sp->next;
collate->error_section.next = NULL;
sp->next = &collate->error_section;
}
} }
} }
else else
{ {
/* Remember this section. */
collate->current_section = sp;
/* One should not be allowed to open the same /* One should not be allowed to open the same
section twice. */ section twice. */
if (sp->first != NULL) if (sp->first != NULL)
@ -3126,8 +3208,13 @@ error while adding equivalent collating symbol"));
"LC_COLLATE", sp->name); "LC_COLLATE", sp->name);
else else
{ {
sp->next = collate->sections; if (collate->current_section == NULL)
collate->sections = sp; collate->current_section = sp;
else
{
sp->next = collate->current_section->next;
collate->current_section->next = sp;
}
} }
/* Next should come the end of the line or a semicolon. */ /* Next should come the end of the line or a semicolon. */
@ -3381,10 +3468,10 @@ error while adding equivalent collating symbol"));
break; break;
} }
if (state != 1 && state != 3 && state != 5) if (state != 0 && state != 1 && state != 3 && state != 5)
goto err_label; goto err_label;
if (state == 5 && nowtok == tok_ucs4) if ((state == 0 || state == 5) && nowtok == tok_ucs4)
goto err_label; goto err_label;
if (nowtok == tok_ucs4) if (nowtok == tok_ucs4)
@ -3399,7 +3486,41 @@ error while adding equivalent collating symbol"));
symlen = arg->val.str.lenmb; symlen = arg->val.str.lenmb;
} }
if (state == 3) if (state == 0)
{
/* We are outside an `order_start' region. This means
we must only accept definitions of values for
collation symbols since these are purely abstract
values and don't need dorections associated. */
struct element_t *seqp;
if (find_entry (&collate->seq_table, symstr, symlen,
(void **) &seqp) == 0)
{
/* It's already defined. First check whether this
is really a collating symbol. */
if (seqp->is_character)
goto err_label;
goto move_entry;
}
else
{
void *result;
if (find_entry (&collate->sym_table, symstr, symlen,
&result) != 0)
/* No collating symbol, it's an error. */
goto err_label;
/* Maybe this is the first time we define a symbol
value and it is before the first actual section. */
if (collate->sections == NULL)
collate->sections = collate->current_section =
&collate->symbol_section;
}
}
else if (state == 3)
{ {
/* It is possible that we already have this collation sequence. /* It is possible that we already have this collation sequence.
In this case we move the entry. */ In this case we move the entry. */
@ -3416,6 +3537,7 @@ error while adding equivalent collating symbol"));
if (find_entry (&collate->seq_table, symstr, symlen, if (find_entry (&collate->seq_table, symstr, symlen,
(void **) &seqp) == 0) (void **) &seqp) == 0)
{ {
move_entry:
/* Remove the entry from the old position. */ /* Remove the entry from the old position. */
if (seqp->last == NULL) if (seqp->last == NULL)
collate->start = seqp->next; collate->start = seqp->next;