Correct handling of different collation sections.

2025-04-24 14:41:06 +08:00 · 2000-06-25 18:01:25 +00:00 · 2000-06-25 18:01:25 +00:00 · ab80bec0cb
commit ab80bec0cb
parent eacc655548
1 changed files with 172 additions and 50 deletions
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@ -47,6 +47,7 @@ struct element_t;
 /* Data type for list of strings.  */
 struct section_list
 {
  struct section_list *def_next;
  struct section_list *next;
  /* Name of the section.  */
  const char *name;
@ -144,6 +145,8 @@ struct locale_collate_t
  int cur_weight_max;
  /* List of known scripts.  */
  struct section_list *known_sections;
  /* List of used sections.  */
  struct section_list *sections;
  /* Current section using definition.  */
  struct section_list *current_section;
@ -151,6 +154,9 @@ struct locale_collate_t
  struct section_list unnamed_section;
  /* To make handling of errors easier we have another section.  */
  struct section_list error_section;
  /* Sometimes we are defining the values for collating symbols before
     the first actual section.  */
  struct section_list symbol_section;
  /* Start of the order list.  */
  struct element_t *start;
@ -562,7 +568,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
 static struct element_t *
 find_element (struct linereader *ldfile, struct locale_collate_t *collate,
-	      const char *str, size_t len, uint32_t *wcstr)
+	      const char *str, size_t len)
 {
  struct element_t *result = NULL;
@ -668,13 +674,26 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 	  elem->weights[weight_cnt].w[0] = NULL;
 	  elem->weights[weight_cnt].cnt = 1;
 	}
-      else if (arg->tok == tok_bsymbol)
+      else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
 	{
-	  struct element_t *val = find_element (ldfile, collate,
+	  char ucs4str[10];
-						arg->val.str.startmb,
+	  struct element_t *val;
-						arg->val.str.lenmb,
+	  char *symstr;
-						arg->val.str.startwc);
+	  size_t symlen;
 	  if (arg->tok == tok_bsymbol)
 	    {
 	      symstr = arg->val.str.startmb;
 	      symlen = arg->val.str.lenmb;
 	    }
 	  else
 	    {
 	      snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
 	      symstr = ucs4str;
 	      symlen = 9;
 	    }
 	  val = find_element (ldfile, collate, symstr, symlen);
 	  if (val == NULL)
 	    break;
@ -720,7 +739,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 		    }
 		    charelem = find_element (ldfile, collate, startp,
-					     cp - startp, NULL);
+					     cp - startp);
 		    ++cp;
 		}
 	      else
@ -731,7 +750,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 		     string as if that would be bsymbols.  Otherwise we
 		     would have to match back to bsymbols somehow and this
 		     is normally not what people normally expect.  */
-		  charelem = find_element (ldfile, collate, cp++, 1, NULL);
+		  charelem = find_element (ldfile, collate, cp++, 1);
 		}
 	      if (charelem == NULL)
@ -1349,7 +1368,7 @@ static void
 collate_startup (struct linereader *ldfile, struct localedef_t *locale,
 		 struct localedef_t *copy_locale, int ignore_content)
 {
-  if (!ignore_content)
+  if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
    {
      struct locale_collate_t *collate;
@ -1432,8 +1451,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
     or in none.  */
  for (i = 0; i < nrules; ++i)
    for (sect = collate->sections; sect != NULL; sect = sect->next)
-      if ((sect->rules[i] & sort_position)
+      if (sect->rules != NULL
-	  != (collate->sections->rules[i] & sort_position))
+	  && ((sect->rules[i] & sort_position)
 	      != (collate->sections->rules[i] & sort_position)))
 	{
 	  error (0, 0, _("\
 %s: `position' must be used for a specific level in all sections or none"),
@ -1771,7 +1791,10 @@ Computing table size for collation table might take a while..."),
    {
      if (need_undefined)
 	{
-	  error (0, 0, _("no definition of `UNDEFINED'"));
+	  /* This seems not to be enforced by recent standards.  Don't
 	     emit an error, simply append UNDEFINED at the end.  */
 	  if (0)
 	    error (0, 0, _("no definition of `UNDEFINED'"));
 	  /* Add UNDEFINED at the end.  */
 	  collate->undefined.mborder =
@ -1793,6 +1816,8 @@ Computing table size for collation table might take a while..."),
     ruleset the same index.  Since there are never many section we can
     use an O(n^2) algorithm here.  */
  sect = collate->sections;
  while (sect != NULL && sect->rules == NULL)
    sect = sect->next;
  assert (sect != NULL);
  ruleidx = 0;
  do
@ -1800,7 +1825,8 @@ Computing table size for collation table might take a while..."),
      struct section_list *osect = collate->sections;
      while (osect != sect)
-	if (memcmp (osect->rules, sect->rules, nrules) == 0)
+	if (osect->rules != NULL
 	    && memcmp (osect->rules, sect->rules, nrules) == 0)
 	  break;
 	else
 	  osect = osect->next;
@ -1811,7 +1837,9 @@ Computing table size for collation table might take a while..."),
 	sect->ruleidx = osect->ruleidx;
      /* Next section.  */
-      sect = sect->next;
+      do
 	sect = sect->next;
      while (sect != NULL && sect->rules == NULL);
    }
  while (sect != NULL);
  /* We are currently not prepared for more than 256 rulesets.  But this
@ -1993,7 +2021,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
  /* Prepare the ruleset table.  */
  for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
-    if (sect->ruleidx == i)
+    if (sect->rules != NULL && sect->ruleidx == i)
      {
 	int j;
@ -2670,7 +2698,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
      /* Get the locale definition.  */
      copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
-				 repertoire_name, charmap);
+				 repertoire_name, charmap, NULL);
      if ((copy_locale->avail & COLLATE_LOCALE) == 0)
 	{
 	  /* Not yet loaded.  So do it now.  */
@ -2708,6 +2736,19 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
      switch (nowtok)
 	{
 	case tok_copy:
 	  /* Allow copying other locales.  */
 	  now = lr_token (ldfile, charmap, NULL);
 	  if (now->tok != tok_string)
 	    goto err_label;
 	  if (! ignore_content)
 	    load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
 			 charmap, result);
 	  lr_ignore_rest (ldfile, 1);
 	  break;
 	case tok_coll_weight_max:
 	  /* Ignore the rest of the line if we don't need the input of
 	     this line.  */
@ -2751,8 +2792,11 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
 	      /* Check whether this section is already known.  */
 	      struct section_list *known = collate->sections;
 	      while (known != NULL)
-		if (strcmp (known->name, arg->val.str.startmb) == 0)
+		{
-		  break;
+		  if (strcmp (known->name, arg->val.str.startmb) == 0)
 		    break;
 		  known = known->next;
 		}
 	      if (known != NULL)
 		{
@ -2822,15 +2866,12 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
 				       repertoire, symbol, symbol_len))
 		    goto col_elem_free;
-		  if (insert_entry (&collate->elem_table,
+		  insert_entry (&collate->elem_table, symbol, symbol_len,
-				    symbol, symbol_len,
+				new_element (collate,
-				    new_element (collate,
+					     arg->val.str.startmb,
-						 arg->val.str.startmb,
+					     arg->val.str.lenmb - 1,
-						 arg->val.str.lenmb - 1,
+					     arg->val.str.startwc,
-						 arg->val.str.startwc,
+					     symbol, symbol_len, 0));
 						 symbol, symbol_len, 0)) < 0)
 		    lr_error (ldfile, _("\
 error while adding collating element"));
 		}
 	      else
 		{
@ -2909,11 +2950,8 @@ error while adding collating element"));
 					   repertoire, symbol, symbol_len))
 			goto col_sym_free;
-		      if (insert_entry (&collate->sym_table,
+		      insert_entry (&collate->sym_table, symbol, symbol_len,
-					symbol, symbol_len,
+				    new_symbol (collate));
 					new_symbol (collate)) < 0)
 			lr_error (ldfile, _("\
 error while adding collating symbol"));
 		    }
 		  else if (symbol_len != endsymbol_len)
 		    {
@ -2972,11 +3010,8 @@ error while adding collating symbol"));
 					       repertoire, symbuf, symbol_len))
 			    goto col_sym_free;
-			  if (insert_entry (&collate->sym_table,
+			  insert_entry (&collate->sym_table, symbuf,
-					    symbuf, symbol_len,
+					symbol_len, new_symbol (collate));
 					    new_symbol (collate)) < 0)
 			    lr_error (ldfile, _("\
 error while adding collating symbol"));
 			  /* Increment the counter.  */
 			  ++from;
@ -3074,6 +3109,44 @@ error while adding equivalent collating symbol"));
 	  lr_ignore_rest (ldfile, 1);
 	  break;
 	case tok_script:
 	  /* We get told about the scripts we know.  */
 	  arg = lr_token (ldfile, charmap, repertoire);
 	  if (arg->tok != tok_bsymbol)
 	    goto err_label;
 	  else
 	    {
 	      struct section_list *runp = collate->known_sections;
 	      char *name;
 	      while (runp != NULL)
 		if (strncmp (runp->name, arg->val.str.startmb,
 			     arg->val.str.lenmb) == 0
 		    && runp->name[arg->val.str.lenmb] == '\0')
 		  break;
 		else
 		  runp = runp->def_next;
 	      if (runp != NULL)
 		{
 		  lr_error (ldfile, _("duplicate definition of script `%s'"),
 			    runp->name);
 		  lr_ignore_rest (ldfile, 0);
 		  break;
 		}
 	      runp = (struct section_list *) xcalloc (1, sizeof (*runp));
 	      name = strncpy (xmalloc (arg->val.str.lenmb + 1),
 			      arg->val.str.startmb, arg->val.str.lenmb);
 	      name[arg->val.str.lenmb] = '\0';
 	      runp->name = name;
 	      runp->def_next = collate->known_sections;
 	      collate->known_sections = runp;
 	    }
 	  lr_ignore_rest (ldfile, 1);
 	  break;
 	case tok_order_start:
 	  /* Ignore the rest of the line if we don't need the input of
 	     this line.  */
@ -3094,10 +3167,13 @@ error while adding equivalent collating symbol"));
 	  if (arg->tok == tok_bsymbol)
 	    {
 	      /* This better should be a section name.  */
-	      struct section_list *sp = collate->sections;
+	      struct section_list *sp = collate->known_sections;
 	      while (sp != NULL
-		     && strcmp (sp->name, arg->val.str.startmb) != 0)
+		     && (sp->name == NULL
-		sp = sp->next;
+			 || strncmp (sp->name, arg->val.str.startmb,
 				     arg->val.str.lenmb) != 0
 			 || sp->name[arg->val.str.lenmb] != '\0'))
 		sp = sp->def_next;
 	      if (sp == NULL)
 		{
@ -3109,15 +3185,21 @@ error while adding equivalent collating symbol"));
 		  if (collate->error_section.first == NULL)
 		    {
-		      collate->error_section.next = collate->sections;
+		      if (collate->sections == NULL)
-		      collate->sections = &collate->error_section;
+			collate->sections = &collate->error_section;
 		      else
 			{
 			  sp = collate->sections;
 			  while (sp->next != NULL)
 			    sp = sp->next;
 			  collate->error_section.next = NULL;
 			  sp->next = &collate->error_section;
 			}
 		    }
 		}
 	      else
 		{
 		  /* Remember this section.  */
 		  collate->current_section = sp;
 		  /* One should not be allowed to open the same
                     section twice.  */
 		  if (sp->first != NULL)
@ -3126,8 +3208,13 @@ error while adding equivalent collating symbol"));
 			      "LC_COLLATE", sp->name);
 		  else
 		    {
-		      sp->next = collate->sections;
+		      if (collate->current_section == NULL)
-		      collate->sections = sp;
+			collate->current_section = sp;
 		      else
 			{
 			  sp->next = collate->current_section->next;
 			  collate->current_section->next = sp;
 			}
 		    }
 		  /* Next should come the end of the line or a semicolon.  */
@ -3381,10 +3468,10 @@ error while adding equivalent collating symbol"));
 	      break;
 	    }
-	  if (state != 1 && state != 3 && state != 5)
+	  if (state != 0 && state != 1 && state != 3 && state != 5)
 	    goto err_label;
-	  if (state == 5 && nowtok == tok_ucs4)
+	  if ((state == 0 || state == 5) && nowtok == tok_ucs4)
 	    goto err_label;
 	  if (nowtok == tok_ucs4)
@ -3399,7 +3486,41 @@ error while adding equivalent collating symbol"));
 	      symlen = arg->val.str.lenmb;
 	    }
-	  if (state == 3)
+	  if (state == 0)
 	    {
 	      /* We are outside an `order_start' region.  This means
                 we must only accept definitions of values for
                 collation symbols since these are purely abstract
                 values and don't need dorections associated.  */
 	      struct element_t *seqp;
 	      if (find_entry (&collate->seq_table, symstr, symlen,
 			      (void **) &seqp) == 0)
 		{
 		  /* It's already defined.  First check whether this
 		     is really a collating symbol.  */
 		  if (seqp->is_character)
 		    goto err_label;
 		  goto move_entry;
 		}
 	      else
 		{
 		  void *result;
 		  if (find_entry (&collate->sym_table, symstr, symlen,
 				  &result) != 0)
 		    /* No collating symbol, it's an error.  */
 		    goto err_label;
 		  /* Maybe this is the first time we define a symbol
 		     value and it is before the first actual section.  */
 		  if (collate->sections == NULL)
 		    collate->sections = collate->current_section =
 		      &collate->symbol_section;
 		}
 	    }
 	  else if (state == 3)
 	    {
 	      /* It is possible that we already have this collation sequence.
 		 In this case we move the entry.  */
@ -3416,6 +3537,7 @@ error while adding equivalent collating symbol"));
 	      if (find_entry (&collate->seq_table, symstr, symlen,
 			      (void **) &seqp) == 0)
 		{
 		move_entry:
 		  /* Remove the entry from the old position.  */
 		  if (seqp->last == NULL)
 		    collate->start = seqp->next;