locale: Implement struct grouping_iterator

The iterator allows grouping while scanning forward through
the digits.  This enables emitting digits as they are processed.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Florian Weimer 2022-12-19 18:56:54 +01:00
parent edd1b2a0d9
commit ffde06c915
4 changed files with 461 additions and 1 deletions

View File

@ -39,6 +39,7 @@ routines := \
gentempfd \
getline \
getw \
grouping_iterator \
iovfscanf \
isoc99_fscanf \
isoc99_scanf \
@ -222,6 +223,10 @@ generated += \
siglist-aux.S \
# generated
tests-internal = \
tst-grouping_iterator \
# tests-internal
test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble
ifeq ($(run-built-tests),yes)
@ -299,7 +304,8 @@ LOCALES := \
hi_IN.UTF-8 \
ja_JP.EUC-JP \
ps_AF.UTF-8 \
# LOCALES
tg_TJ.UTF-8 \
# LOCALES
include ../gen-locales.mk
$(objpfx)bug14.out: $(gen-locales)
@ -307,6 +313,7 @@ $(objpfx)scanf13.out: $(gen-locales)
$(objpfx)test-vfprintf.out: $(gen-locales)
$(objpfx)tst-grouping.out: $(gen-locales)
$(objpfx)tst-grouping2.out: $(gen-locales)
$(objpfx)tst-grouping_iterator.out: $(gen-locales)
$(objpfx)tst-sprintf.out: $(gen-locales)
$(objpfx)tst-sscanf.out: $(gen-locales)
$(objpfx)tst-swprintf.out: $(gen-locales)

View File

@ -0,0 +1,126 @@
/* Iterator for inserting thousands separators into numbers.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <grouping_iterator.h>
#include <assert.h>
#include <limits.h>
#include <locale/localeinfo.h>
#include <stdint.h>
#include <string.h>
/* Initializes *IT with no grouping information for a string of length
DIGITS, and return false to indicate no grouping. */
bool
__grouping_iterator_init_none (struct grouping_iterator *it,
unsigned int digits)
{
memset (it, 0, sizeof (*it));
it->remaining_in_current_group = digits;
it->remaining = digits;
return false;
}
static bool
grouping_iterator_setup (struct grouping_iterator *it, unsigned int digits,
const char *grouping)
{
/* We treat all negative values like CHAR_MAX. */
if (*grouping == CHAR_MAX || *grouping <= 0)
/* No grouping should be done. */
return __grouping_iterator_init_none (it, digits);
unsigned int remaining_to_group = digits;
unsigned int non_repeating_groups = 0;
unsigned int groups = 0;
while (true)
{
non_repeating_groups += *grouping;
if (remaining_to_group <= (unsigned int) *grouping)
break;
++groups;
remaining_to_group -= *grouping++;
if (*grouping == CHAR_MAX
#if CHAR_MIN < 0
|| *grouping < 0
#endif
)
/* No more grouping should be done. */
break;
else if (*grouping == 0)
{
/* Same grouping repeats. */
--grouping;
non_repeating_groups -= *grouping; /* Over-counted. */
unsigned int repeats = (remaining_to_group - 1) / *grouping;
groups += repeats;
remaining_to_group -= repeats * *grouping;
break;
}
}
it->remaining_in_current_group = remaining_to_group;
it->remaining = digits;
it->groupings = grouping;
it->non_repeating_groups = non_repeating_groups;
it->separators = groups;
return it->separators > 0;
}
/* Returns the appropriate grouping item in LOC depending on CATEGORY
(which must be LC_MONETARY or LC_NUMERIC). */
static const char *
get_grouping (int category, locale_t loc)
{
return _nl_lookup (loc, category,
category == LC_MONETARY ? MON_GROUPING : GROUPING);
}
bool
__grouping_iterator_init (struct grouping_iterator *it,
int category, locale_t loc, unsigned int digits)
{
if (digits <= 1)
return __grouping_iterator_init_none (it, digits);
else
return grouping_iterator_setup (it, digits, get_grouping (category, loc));
}
bool
__grouping_iterator_next (struct grouping_iterator *it)
{
assert (it->remaining > 0);
--it->remaining;
if (it->remaining_in_current_group > 0)
{
--it->remaining_in_current_group;
return false;
}
/* If we are in the non-repeating part, switch group. */
if (it->remaining < it->non_repeating_groups)
--it->groupings;
it->remaining_in_current_group = *it->groupings - 1;
return true;
}

View File

@ -0,0 +1,65 @@
/* Iterator for grouping a number while scanning it forward.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#ifndef GROUPING_ITERATOR_H
#define GROUPING_ITERATOR_H
#include <locale.h>
#include <stdbool.h>
struct grouping_iterator
{
/* Number of characters in the current group. If this reaches zero,
a thousands separator needs to be emittted. */
unsigned int remaining_in_current_group;
/* Number of characters remaining in the number. This is used to
detect the start of the non-repeating groups. */
unsigned int remaining;
/* Points to the current grouping descriptor. */
const char *groupings;
/* Total number of characters in the non-repeating groups. */
unsigned int non_repeating_groups;
/* Number of separators that will be inserted if the whole number is
processed. (Does not change during iteration.) */
unsigned int separators;
};
struct __locale_data;
/* Initializes *IT with the data from LOCDATA (which must be for
LC_MONETARY or LC_NUMERIC). DIGITS is the length of the number.
Returns true if grouping is active, false if not. */
bool __grouping_iterator_init (struct grouping_iterator *it,
int category, locale_t loc,
unsigned int digits) attribute_hidden;
/* Initializes *IT with no grouping information for a string of length
DIGITS, and return false to indicate no grouping. */
bool __grouping_iterator_init_none (struct grouping_iterator *it,
unsigned int digits)
attribute_hidden;
/* Advances to the next character and returns true if a thousands
separator should be inserted before emitting it. */
bool __grouping_iterator_next (struct grouping_iterator *it);
#endif /* GROUPING_ITERATOR_H */

View File

@ -0,0 +1,262 @@
/* Test for struct grouping_iterator.
Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/* Rebuild the fail to access internal-only functions. */
#include <grouping_iterator.c>
#include <stdio.h>
#include <support/check.h>
#include <support/support.h>
#include <support/test-driver.h>
static void
check (int lineno, const char *groupings,
const char *input, const char *expected)
{
if (test_verbose)
{
printf ("info: %s:%d: \"%s\" via \"", __FILE__, lineno, input);
for (const char *p = groupings; *p != 0; ++p)
printf ("\\%o", *p & 0xff);
printf ("\" to \"%s\"\n", expected);
}
size_t initial_group = strchrnul (expected, '\'') - expected;
size_t separators = 0;
for (const char *p = expected; *p != '\0'; ++p)
separators += *p == '\'';
size_t digits = strlen (input);
char *out = xmalloc (2 * digits + 1);
struct grouping_iterator it;
TEST_COMPARE (grouping_iterator_setup (&it, digits, groupings),
strchr (expected, '\'') != NULL);
TEST_COMPARE (it.remaining, digits);
TEST_COMPARE (it.remaining_in_current_group, initial_group);
TEST_COMPARE (it.separators, separators);
char *p = out;
while (*input != '\0')
{
if (__grouping_iterator_next (&it))
*p++ = '\'';
TEST_COMPARE (it.separators, separators);
*p++ = *input++;
}
*p++ = '\0';
TEST_COMPARE (it.remaining, 0);
TEST_COMPARE (it.remaining_in_current_group, 0);
TEST_COMPARE_STRING (out, expected);
free (out);
}
static int
do_test (void)
{
check (__LINE__, "", "1", "1");
check (__LINE__, "", "12", "12");
check (__LINE__, "", "123", "123");
check (__LINE__, "", "1234", "1234");
check (__LINE__, "\3", "1", "1");
check (__LINE__, "\3", "12", "12");
check (__LINE__, "\3", "123", "123");
check (__LINE__, "\3", "1234", "1'234");
check (__LINE__, "\3", "12345", "12'345");
check (__LINE__, "\3", "123456", "123'456");
check (__LINE__, "\3", "1234567", "1'234'567");
check (__LINE__, "\3", "12345678", "12'345'678");
check (__LINE__, "\3", "123456789", "123'456'789");
check (__LINE__, "\3", "1234567890", "1'234'567'890");
check (__LINE__, "\2\3", "1", "1");
check (__LINE__, "\2\3", "12", "12");
check (__LINE__, "\2\3", "123", "1'23");
check (__LINE__, "\2\3", "1234", "12'34");
check (__LINE__, "\2\3", "12345", "123'45");
check (__LINE__, "\2\3", "123456", "1'234'56");
check (__LINE__, "\2\3", "1234567", "12'345'67");
check (__LINE__, "\2\3", "12345678", "123'456'78");
check (__LINE__, "\2\3", "123456789", "1'234'567'89");
check (__LINE__, "\2\3", "1234567890", "12'345'678'90");
check (__LINE__, "\3\2", "1", "1");
check (__LINE__, "\3\2", "12", "12");
check (__LINE__, "\3\2", "123", "123");
check (__LINE__, "\3\2", "1234", "1'234");
check (__LINE__, "\3\2", "12345", "12'345");
check (__LINE__, "\3\2", "123456", "1'23'456");
check (__LINE__, "\3\2", "1234567", "12'34'567");
check (__LINE__, "\3\2", "12345678", "1'23'45'678");
check (__LINE__, "\3\2", "123456789", "12'34'56'789");
check (__LINE__, "\3\2", "1234567890", "1'23'45'67'890");
check (__LINE__, "\3\2\1", "1", "1");
check (__LINE__, "\3\2\1", "12", "12");
check (__LINE__, "\3\2\1", "123", "123");
check (__LINE__, "\3\2\1", "1234", "1'234");
check (__LINE__, "\3\2\1", "12345", "12'345");
check (__LINE__, "\3\2\1", "123456", "1'23'456");
check (__LINE__, "\3\2\1", "1234567", "1'2'34'567");
check (__LINE__, "\3\2\1", "12345678", "1'2'3'45'678");
check (__LINE__, "\3\2\1", "123456789", "1'2'3'4'56'789");
check (__LINE__, "\3\2\1", "1234567890", "1'2'3'4'5'67'890");
check (__LINE__, "\2\3\1", "1", "1");
check (__LINE__, "\2\3\1", "12", "12");
check (__LINE__, "\2\3\1", "123", "1'23");
check (__LINE__, "\2\3\1", "1234", "12'34");
check (__LINE__, "\2\3\1", "12345", "123'45");
check (__LINE__, "\2\3\1", "123456", "1'234'56");
check (__LINE__, "\2\3\1", "1234567", "1'2'345'67");
check (__LINE__, "\2\3\1", "12345678", "1'2'3'456'78");
check (__LINE__, "\2\3\1", "123456789", "1'2'3'4'567'89");
check (__LINE__, "\2\3\1", "1234567890", "1'2'3'4'5'678'90");
/* No repeats. */
check (__LINE__, "\3\377", "1", "1");
check (__LINE__, "\3\377", "12", "12");
check (__LINE__, "\3\377", "123", "123");
check (__LINE__, "\3\377", "1234", "1'234");
check (__LINE__, "\3\377", "12345", "12'345");
check (__LINE__, "\3\377", "123456", "123'456");
check (__LINE__, "\3\377", "1234567", "1234'567");
check (__LINE__, "\3\377", "12345678", "12345'678");
check (__LINE__, "\2\3\377", "1", "1");
check (__LINE__, "\2\3\377", "12", "12");
check (__LINE__, "\2\3\377", "123", "1'23");
check (__LINE__, "\2\3\377", "1234", "12'34");
check (__LINE__, "\2\3\377", "12345", "123'45");
check (__LINE__, "\2\3\377", "123456", "1'234'56");
check (__LINE__, "\2\3\377", "1234567", "12'345'67");
check (__LINE__, "\2\3\377", "12345678", "123'456'78");
check (__LINE__, "\2\3\377", "123456789", "1234'567'89");
check (__LINE__, "\2\3\377", "1234567890", "12345'678'90");
check (__LINE__, "\3\2\377", "1", "1");
check (__LINE__, "\3\2\377", "12", "12");
check (__LINE__, "\3\2\377", "123", "123");
check (__LINE__, "\3\2\377", "1234", "1'234");
check (__LINE__, "\3\2\377", "12345", "12'345");
check (__LINE__, "\3\2\377", "123456", "1'23'456");
check (__LINE__, "\3\2\377", "1234567", "12'34'567");
check (__LINE__, "\3\2\377", "12345678", "123'45'678");
check (__LINE__, "\3\2\377", "123456789", "1234'56'789");
check (__LINE__, "\3\2\377", "1234567890", "12345'67'890");
/* Locale-based tests. */
locale_t loc;
struct lc_ctype_data *ctype;
struct grouping_iterator it;
loc = newlocale (LC_ALL_MASK, "de_DE.UTF-8", 0);
TEST_VERIFY_EXIT (loc != 0);
ctype = loc->__locales[LC_CTYPE]->private;
TEST_VERIFY (!ctype->outdigit_translation_needed);
for (int i = 0; i <= 9; ++i)
TEST_COMPARE (ctype->outdigit_bytes[i], 1);
TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1);
TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */
TEST_COMPARE (it.separators, 2);
TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */
TEST_COMPARE (it.separators, 2);
freelocale (loc);
loc = newlocale (LC_ALL_MASK, "tg_TJ.UTF-8", 0);
TEST_VERIFY_EXIT (loc != 0);
ctype = loc->__locales[LC_CTYPE]->private;
TEST_VERIFY (!ctype->outdigit_translation_needed);
for (int i = 0; i <= 9; ++i)
TEST_COMPARE (ctype->outdigit_bytes[i], 1);
TEST_COMPARE (ctype->outdigit_bytes_all_equal, 1);
TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */
TEST_COMPARE (it.separators, 2);
TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 3); /* Locale duplicates 3. */
TEST_COMPARE (it.separators, 2);
freelocale (loc);
loc = newlocale (LC_ALL_MASK, "hi_IN.UTF-8", 0);
TEST_VERIFY_EXIT (loc != 0);
ctype = loc->__locales[LC_CTYPE]->private;
TEST_VERIFY (ctype->outdigit_translation_needed);
for (int i = 0; i <= 9; ++i)
/* Locale uses Devanagari digits. */
TEST_COMPARE (ctype->outdigit_bytes[i], 3);
TEST_COMPARE (ctype->outdigit_bytes_all_equal, 3);
TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 0);
TEST_COMPARE (it.separators, 2);
TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 1);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 2);
TEST_COMPARE (it.non_repeating_groups, 3);
TEST_COMPARE (it.separators, 3);
freelocale (loc);
loc = newlocale (LC_ALL_MASK, "ps_AF.UTF-8", 0);
TEST_VERIFY_EXIT (loc != 0);
ctype = loc->__locales[LC_CTYPE]->private;
TEST_VERIFY (ctype->outdigit_translation_needed);
for (int i = 0; i <= 9; ++i)
/* Locale uses non-ASCII digits. */
TEST_COMPARE (ctype->outdigit_bytes[i], 2);
TEST_COMPARE (ctype->outdigit_bytes_all_equal, 2);
TEST_COMPARE (__grouping_iterator_init (&it, LC_NUMERIC, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 0);
TEST_COMPARE (it.separators, 2);
TEST_COMPARE (__grouping_iterator_init (&it, LC_MONETARY, loc, 8), true);
TEST_COMPARE (it.remaining_in_current_group, 2);
TEST_COMPARE (it.remaining, 8);
TEST_COMPARE (*it.groupings, 3);
TEST_COMPARE (it.non_repeating_groups, 0);
TEST_COMPARE (it.separators, 2);
freelocale (loc);
return 0;
}
#include <support/test-driver.c>