* iconv/gconv_trans.c: Correct a few bugs in the search loop.  Remove
	remainders of hash table.
	* locale/categories.def: Remove remainders of transliteration
	hash table.
	* locale/langinfo.h: Likewise.
	* locale/programs/ld-ctype.c: Likewise.  Fix code to write out
	transliteration tables.

	* locale/gen-translit.pl: New file.
	* locale/C-translit.h.in: New file.
	* locale/C-ctype.c: Include C-translit.h.  Initialize transliteration
	data pointers with data from this file.
	* locale/Makefile (distribute): Add C-translit.h.in, C-translit.h,
	and gen-translit.pl.
	Add rule to generate C-translit.h.
This commit is contained in:
Ulrich Drepper 2000-07-22 21:22:08 +00:00
parent fcc10ffab6
commit 04fbc779fe
10 changed files with 303 additions and 26 deletions

View File

@ -1,5 +1,21 @@
2000-07-22 Ulrich Drepper <drepper@redhat.com>
* iconv/gconv_trans.c: Correct a few bugs in the search loop. Remove
remainders of hash table.
* locale/categories.def: Remove remainders of transliteration
hash table.
* locale/langinfo.h: Likewise.
* locale/programs/ld-ctype.c: Likewise. Fix code to write out
transliteration tables.
* locale/gen-translit.pl: New file.
* locale/C-translit.h.in: New file.
* locale/C-ctype.c: Include C-translit.h. Initialize transliteration
data pointers with data from this file.
* locale/Makefile (distribute): Add C-translit.h.in, C-translit.h,
and gen-translit.pl.
Add rule to generate C-translit.h.
* stdio-common/vfscanf.c: Handle input -- with format %f correctly
(it's no input error).
* stdio-common/tstscanf.c: Add test case for format %f with input --.

View File

@ -41,7 +41,6 @@ __gconv_transliterate (struct __gconv_step *step,
{
/* Find out about the locale's transliteration. */
uint_fast32_t size;
uint_fast32_t layers;
uint32_t *from_idx;
uint32_t *from_tbl;
uint32_t *to_idx;
@ -57,12 +56,11 @@ __gconv_transliterate (struct __gconv_step *step,
/* If there is no transliteration information in the locale don't do
anything and return the error. */
size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
if (size == 0)
goto no_rules;
/* Get the rest of the values. */
layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
@ -148,9 +146,9 @@ __gconv_transliterate (struct __gconv_step *step,
return __GCONV_INCOMPLETE_INPUT;
if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
low = idx;
low = med + 1;
else
high = idx;
high = med;
}
no_rules:

View File

@ -20,6 +20,8 @@
#include "localeinfo.h"
#include <endian.h>
#include "C-translit.h"
/* This table's entries are taken from POSIX.2 Table 2-6
``LC_CTYPE Category Definition in the POSIX Locale''.
@ -420,12 +422,11 @@ const struct locale_data _nl_C_LC_CTYPE =
{ word: L'7' },
{ word: L'8' },
{ word: L'9' },
{ word: 0 },
{ word: 0 },
{ string: NULL },
{ string: NULL },
{ string: NULL },
{ string: NULL },
{ word: NTRANSLIT },
{ wstr: translit_from_idx },
{ wstr: (uint32_t *) translit_from_tbl },
{ wstr: translit_to_idx },
{ wstr: (uint32_t *) translit_to_tbl },
{ word: 1 },
{ wstr: (uint32_t *) L"?" },
{ word: 0 },

21
locale/C-translit.h Normal file
View File

@ -0,0 +1,21 @@
#define NTRANSLIT 20
static const uint32_t translit_from_idx[] =
{
0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
24, 26, 28, 30, 32, 34, 36, 38
};
static const wchar_t translit_from_tbl[] =
L"\xa9" L"\0" L"\xab" L"\0" L"\xae" L"\0" L"\xbb" L"\0" L"\xbc" L"\0"
L"\xbd" L"\0" L"\xbe" L"\0" L"\xc4" L"\0" L"\xc5" L"\0" L"\xc6" L"\0"
L"\xd6" L"\0" L"\xdc" L"\0" L"\xdf" L"\0" L"\xe4" L"\0" L"\xe5" L"\0"
L"\xe6" L"\0" L"\xf6" L"\0" L"\xfc" L"\0" L"\x201c" L"\0" L"\x201d";
static const uint32_t translit_to_idx[] =
{
0, 5, 9, 14, 18, 23, 28, 33, 37, 41, 45, 49,
53, 57, 61, 65, 69, 73, 77, 80
};
static const wchar_t translit_to_tbl[] =
L"(C)\0" L"\0" L"<<\0" L"\0" L"(R)\0" L"\0" L">>\0" L"\0" L"1/4\0" L"\0"
L"1/2\0" L"\0" L"3/4\0" L"\0" L"AE\0" L"\0" L"AA\0" L"\0" L"AE\0" L"\0"
L"OE\0" L"\0" L"UE\0" L"\0" L"ss\0" L"\0" L"ae\0" L"\0" L"aa\0" L"\0"
L"ae\0" L"\0" L"oe\0" L"\0" L"ue\0" L"\0" L"\"\0" L"\0" L"\"\0";

97
locale/C-translit.h.in Normal file
View File

@ -0,0 +1,97 @@
/* Transliteration for the C locale.
Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* The entries here have to be sorted relative to the input string. */
/* <U00A9> COPYRIGHT SIGN. */
"\xa9" "(C)"
/* <U00AB> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK. */
"\xab" "<<"
/* <U00AE> REGISTERED SIGN. */
"\xae" "(R)"
/* <U00BB> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK. */
"\xbb" ">>"
/* <U00BC> VULGAR FRACTION ONE QUARTER. */
"\xbc" "1/4"
/* <U00BD> VULGAR FRACTION ONE HALF. */
"\xbd" "1/2"
/* <U00BE> VULGAR FRACTION THREE QUARTERS. */
"\xbe" "3/4"
/* <U00C4> LATIN CAPITAL LETTER A WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xc4" "AE"
/* <U00C5> LATIN CAPITAL LETTER A WITH RING ABOVE. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xc5" "AA"
/* <U00C6> LATIN CAPITAL LETTER AE. */
"\xc6" "AE"
/* <U00D6> LATIN CAPITAL LETTER O WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xd6" "OE"
/* <U00DC> LATIN CAPITAL LETTER U WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xdc" "UE"
/* <U00DF> LATIN SMALL LETTER SHARP S. */
"\xdf" "ss"
/* <U00E4> LATIN SMALL LETTER A WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xe4" "ae"
/* <U00E5> LATIN SMALL LETTER A WITH RING ABOVE. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xe5" "aa"
/* <U00E6> LATIN SMALL LETTER AE. */
"\xe6" "ae"
/* <U00F6> LATIN SMALL LETTER O WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xf6" "oe"
/* <U00FC> LATIN SMALL LETTER U WITH DIAERESIS. */
/* XXX It is not clear whether this is the best transliteration for
all locales. If not, we probably have to take it out completely. */
"\xfc" "ue"
/* <U201C> LEFT DOUBLE QUOTATION MARK. */
"\x201c" "\""
/* <U201D> RIGHT DOUBLE QUOTATION MARK. */
"\x201d" "\""

View File

@ -25,6 +25,7 @@ headers = locale.h langinfo.h xlocale.h
distribute = localeinfo.h categories.def iso-639.def iso-3166.def \
iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \
indigits.h indigitswc.h outdigits.h outdigitswc.h \
C-translit.h.in C-translit.h gen-translit.pl \
$(addprefix programs/, \
locale.c localedef.c \
$(localedef-modules:=.c) $(locale-modules:=.c) \
@ -73,6 +74,13 @@ $(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o)
$(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
$(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
C-translit.h: C-translit.h.in gen-translit.pl
$(PERL) gen-translit.pl < $< > $@.tmp
$(move-if-change) $@.tmp $@
ifeq ($(with-cvs),yes)
test ! -d CVS || cvs $(CVSOPTS) commit -mRegenerated $@
endif
localepath = "$(localedir):$(i18ndir)"
locale-CPPFLAGS := -DLOCALE_PATH='$(localepath)' \

View File

@ -126,8 +126,7 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_SIZE, "ctype-translit-hash-size", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_LAYERS, "ctype-translit-hash-layers", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, string)

142
locale/gen-translit.pl Normal file
View File

@ -0,0 +1,142 @@
#! /usr/bin/perl -w
open F, "cat C-translit.h.in | gcc -E - |" || die "Cannot preprocess input file";
sub cstrlen {
my($str) = @_;
my($len) = length($str);
my($cnt);
my($res) = 0;
for ($cnt = 0; $cnt < $len; ++$cnt) {
if (substr($str, $cnt, 1) eq '\\') {
# Recognize the escape sequence.
if (substr($str, $cnt + 1, 1) eq 'x') {
my($inner);
for ($inner = $cnt + 2; $inner < $len && $inner < $cnt + 10; ++$inner) {
my($ch) = substr($str, $inner, 1);
next if (($ch ge '0' && $ch le '9')
|| ($ch ge 'a' && $ch le 'f')
|| ($ch ge 'A' && $ch le 'F'));
last;
}
$cnt = $inner;
++$res;
} else {
die "invalid input" if ($cnt + 1 >= $len);
++$res;
++$cnt;
}
} else {
++$res;
}
}
return $res;
}
while (<F>) {
next if (/^#/);
next if (/^[ ]*$/);
chop;
if (/"([^\"]*)"[ ]*"(.*)"/) {
my($from) = $1;
my($to) = $2;
my($fromlen) = cstrlen($from);
my($tolen) = cstrlen($to);
push(@froms, $from);
push(@fromlens, $fromlen);
push(@tos, $to);
push(@tolens, $tolen);
}
}
printf "#define NTRANSLIT %d\n", $#froms + 1;
printf "static const uint32_t translit_from_idx[] =\n{\n ";
$col = 2;
$total = 0;
for ($cnt = 0; $cnt <= $#fromlens; ++$cnt) {
if ($cnt != 0) {
if ($col + 7 >= 79) {
printf(",\n ");
$col = 2;
} else {
printf(", ");
$col += 2;
}
}
printf("%4d", $total);
$total += $fromlens[$cnt] + 1;
$col += 4;
}
printf("\n};\n");
printf "static const wchar_t translit_from_tbl[] =\n ";
$col = 1;
for ($cnt = 0; $cnt <= $#froms; ++$cnt) {
if ($cnt != 0) {
if ($col + 6 >= 79) {
printf("\n ");
$col = 1;
}
printf(" L\"\\0\"");
$col += 6;
}
if ($col > 2 && $col + length($froms[$cnt]) + 4 >= 79) {
printf("\n ");
$col = 2;
} else {
printf(" ");
++$col;
}
printf("L\"$froms[$cnt]\"");
$col += length($froms[$cnt]) + 3;
}
printf(";\n");
printf "static const uint32_t translit_to_idx[] =\n{\n ";
$col = 2;
$total = 0;
for ($cnt = 0; $cnt <= $#tolens; ++$cnt) {
if ($cnt != 0) {
if ($col + 7 >= 79) {
printf(",\n ");
$col = 2;
} else {
printf(", ");
$col += 2;
}
}
printf("%4d", $total);
$total += $tolens[$cnt] + 2;
$col += 4;
}
printf("\n};\n");
printf "static const wchar_t translit_to_tbl[] =\n ";
$col = 1;
for ($cnt = 0; $cnt <= $#tos; ++$cnt) {
if ($cnt != 0) {
if ($col + 6 >= 79) {
printf("\n ");
$col = 1;
}
printf(" L\"\\0\"");
$col += 6;
}
if ($col > 2 && $col + length($tos[$cnt]) + 6 >= 79) {
printf("\n ");
$col = 2;
} else {
printf(" ");
++$col;
}
printf("L\"$tos[$cnt]\\0\"");
$col += length($tos[$cnt]) + 5;
}
printf(";\n");
exit 0;

View File

@ -316,8 +316,7 @@ enum
_NL_CTYPE_OUTDIGIT7_WC,
_NL_CTYPE_OUTDIGIT8_WC,
_NL_CTYPE_OUTDIGIT9_WC,
_NL_CTYPE_TRANSLIT_HASH_SIZE,
_NL_CTYPE_TRANSLIT_HASH_LAYERS,
_NL_CTYPE_TRANSLIT_TAB_SIZE,
_NL_CTYPE_TRANSLIT_FROM_IDX,
_NL_CTYPE_TRANSLIT_FROM_TBL,
_NL_CTYPE_TRANSLIT_TO_IDX,

View File

@ -173,13 +173,11 @@ struct locale_ctype_t
unsigned char *width;
uint32_t mb_cur_max;
const char *codeset_name;
uint32_t translit_hash_size;
uint32_t translit_hash_layers;
uint32_t *translit_from_idx;
uint32_t *translit_from_tbl;
uint32_t *translit_to_idx;
uint32_t *translit_to_tbl;
size_t translit_idx_size;
uint32_t translit_idx_size;
size_t translit_from_tbl_size;
size_t translit_to_tbl_size;
@ -866,7 +864,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
{
#define CTYPE_EMPTY(name) \
case name: \
iov[2 + elem + offset].iov_base = ""; \
iov[2 + elem + offset].iov_base = (void *) ""; \
iov[2 + elem + offset].iov_len = 0; \
idx[elem + 1] = idx[elem]; \
break
@ -911,14 +909,12 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
ctype->names, (ctype->plane_size * ctype->plane_cnt
* sizeof (uint32_t)));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
&ctype->translit_hash_size, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
&ctype->translit_hash_layers, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
&ctype->translit_idx_size, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
ctype->translit_from_idx,
ctype->translit_idx_size);
ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
ctype->translit_from_tbl,
@ -926,7 +922,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
ctype->translit_to_idx,
ctype->translit_idx_size);
ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
ctype->translit_to_tbl, ctype->translit_to_tbl_size);
@ -3664,7 +3660,7 @@ Computing table size for character classes might take a while..."),
}
/* Store the information about the length. */
ctype->translit_idx_size = number * sizeof (uint32_t);
ctype->translit_idx_size = number;
ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
}