mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-11-21 03:13:05 +08:00
Use ICU by default at initdb time.
If the ICU locale is not specified, initialize the default collator and retrieve the locale name from that. Discussion: https://postgr.es/m/510d284759f6e943ce15096167760b2edcb2e700.camel@j-davis.com Reviewed-by: Peter Eisentraut
This commit is contained in:
parent
a7e584a7d6
commit
27b62377b4
@ -1,9 +1,16 @@
|
||||
/*
|
||||
* This test must be run in a database with UTF-8 encoding
|
||||
* and a Unicode-aware locale.
|
||||
*
|
||||
* Also disable this file for ICU, because the test for the the
|
||||
* Turkish dotted I is not correct for many ICU locales. citext always
|
||||
* uses the default collation, so it's not easy to restrict the test
|
||||
* to the "tr-TR-x-icu" collation where it will succeed.
|
||||
*/
|
||||
SELECT getdatabaseencoding() <> 'UTF8' OR
|
||||
current_setting('lc_ctype') = 'C'
|
||||
current_setting('lc_ctype') = 'C' OR
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
|
@ -1,9 +1,16 @@
|
||||
/*
|
||||
* This test must be run in a database with UTF-8 encoding
|
||||
* and a Unicode-aware locale.
|
||||
*
|
||||
* Also disable this file for ICU, because the test for the the
|
||||
* Turkish dotted I is not correct for many ICU locales. citext always
|
||||
* uses the default collation, so it's not easy to restrict the test
|
||||
* to the "tr-TR-x-icu" collation where it will succeed.
|
||||
*/
|
||||
SELECT getdatabaseencoding() <> 'UTF8' OR
|
||||
current_setting('lc_ctype') = 'C'
|
||||
current_setting('lc_ctype') = 'C' OR
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
|
@ -1,10 +1,17 @@
|
||||
/*
|
||||
* This test must be run in a database with UTF-8 encoding
|
||||
* and a Unicode-aware locale.
|
||||
*
|
||||
* Also disable this file for ICU, because the test for the the
|
||||
* Turkish dotted I is not correct for many ICU locales. citext always
|
||||
* uses the default collation, so it's not easy to restrict the test
|
||||
* to the "tr-TR-x-icu" collation where it will succeed.
|
||||
*/
|
||||
|
||||
SELECT getdatabaseencoding() <> 'UTF8' OR
|
||||
current_setting('lc_ctype') = 'C'
|
||||
current_setting('lc_ctype') = 'C' OR
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
|
@ -1,3 +1,12 @@
|
||||
-- unaccent is broken if the default collation is provided by ICU and
|
||||
-- LC_CTYPE=C
|
||||
SELECT current_setting('lc_ctype') = 'C' AND
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
\endif
|
||||
CREATE EXTENSION unaccent;
|
||||
-- must have a UTF8 database
|
||||
SELECT getdatabaseencoding();
|
||||
|
8
contrib/unaccent/expected/unaccent_1.out
Normal file
8
contrib/unaccent/expected/unaccent_1.out
Normal file
@ -0,0 +1,8 @@
|
||||
-- unaccent is broken if the default collation is provided by ICU and
|
||||
-- LC_CTYPE=C
|
||||
SELECT current_setting('lc_ctype') = 'C' AND
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
@ -1,3 +1,14 @@
|
||||
|
||||
-- unaccent is broken if the default collation is provided by ICU and
|
||||
-- LC_CTYPE=C
|
||||
SELECT current_setting('lc_ctype') = 'C' AND
|
||||
(SELECT datlocprovider='i' FROM pg_database
|
||||
WHERE datname=current_database())
|
||||
AS skip_test \gset
|
||||
\if :skip_test
|
||||
\quit
|
||||
\endif
|
||||
|
||||
CREATE EXTENSION unaccent;
|
||||
|
||||
-- must have a UTF8 database
|
||||
|
@ -89,10 +89,28 @@ PostgreSQL documentation
|
||||
and character set encoding. These can also be set separately for each
|
||||
database when it is created. <command>initdb</command> determines those
|
||||
settings for the template databases, which will serve as the default for
|
||||
all other databases. By default, <command>initdb</command> uses the
|
||||
locale provider <literal>libc</literal>, takes the locale settings from
|
||||
the environment, and determines the encoding from the locale settings.
|
||||
This is almost always sufficient, unless there are special requirements.
|
||||
all other databases.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, <command>initdb</command> uses the ICU library to provide
|
||||
locale services if the server was built with ICU support; otherwise it uses
|
||||
the <literal>libc</literal> locale provider (see <xref
|
||||
linkend="locale-providers"/>). To choose the specific ICU locale ID to
|
||||
apply, use the option <option>--icu-locale</option>. Note that for
|
||||
implementation reasons and to support legacy code,
|
||||
<command>initdb</command> will still select and initialize libc locale
|
||||
settings when the ICU locale provider is used.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Alternatively, <command>initdb</command> can use the locale provider
|
||||
<literal>libc</literal>. To select this option, specify
|
||||
<literal>--locale-provider=libc</literal>, or build the server without ICU
|
||||
support. The <literal>libc</literal> locale provider takes the locale
|
||||
settings from the environment, and determines the encoding from the locale
|
||||
settings. This is almost always sufficient, unless there are special
|
||||
requirements.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@ -103,17 +121,6 @@ PostgreSQL documentation
|
||||
categories can give nonsensical results, so this should be used with care.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Alternatively, the ICU library can be used to provide locale services.
|
||||
(Again, this only sets the default for subsequently created databases.) To
|
||||
select this option, specify <literal>--locale-provider=icu</literal>.
|
||||
To choose the specific ICU locale ID to apply, use the option
|
||||
<option>--icu-locale</option>. Note that
|
||||
for implementation reasons and to support legacy code,
|
||||
<command>initdb</command> will still select and initialize libc locale
|
||||
settings when the ICU locale provider is used.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
When <command>initdb</command> runs, it will print out the locale settings
|
||||
it has chosen. If you have complex requirements or specified multiple
|
||||
@ -234,7 +241,13 @@ PostgreSQL documentation
|
||||
<term><option>--icu-locale=<replaceable>locale</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Specifies the ICU locale ID, if the ICU locale provider is used.
|
||||
Specifies the ICU locale when the ICU provider is used. Locale support
|
||||
is described in <xref linkend="locale"/>.
|
||||
</para>
|
||||
<para>
|
||||
If this option is not specified, the locale is inherited from the
|
||||
environment in which <command>initdb</command> runs. The environment's
|
||||
locale is matched to a similar ICU locale name, if possible.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -307,10 +320,12 @@ PostgreSQL documentation
|
||||
<term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
This option sets the locale provider for databases created in the
|
||||
new cluster. It can be overridden in the <command>CREATE
|
||||
This option sets the locale provider for databases created in the new
|
||||
cluster. It can be overridden in the <command>CREATE
|
||||
DATABASE</command> command when new databases are subsequently
|
||||
created. The default is <literal>libc</literal>.
|
||||
created. The default is <literal>icu</literal> if the server was
|
||||
built with ICU support; otherwise the default is
|
||||
<literal>libc</literal> (see <xref linkend="locale-providers"/>).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
@ -16,7 +16,7 @@ subdir = src/bin/initdb
|
||||
top_builddir = ../../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
|
||||
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
|
||||
override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS)
|
||||
|
||||
# Note: it's important that we link to encnames.o from libpgcommon, not
|
||||
# from libpq, else we have risks of version skew if we run with a libpq
|
||||
@ -24,7 +24,7 @@ override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
|
||||
# should ensure that that happens.
|
||||
#
|
||||
# We need libpq only because fe_utils does.
|
||||
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
|
||||
LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS)
|
||||
|
||||
# use system timezone data?
|
||||
ifneq (,$(with_system_tzdata))
|
||||
|
@ -53,6 +53,9 @@
|
||||
#include <netdb.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#ifdef USE_ICU
|
||||
#include <unicode/ucol.h>
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
@ -133,7 +136,11 @@ static char *lc_monetary = NULL;
|
||||
static char *lc_numeric = NULL;
|
||||
static char *lc_time = NULL;
|
||||
static char *lc_messages = NULL;
|
||||
#ifdef USE_ICU
|
||||
static char locale_provider = COLLPROVIDER_ICU;
|
||||
#else
|
||||
static char locale_provider = COLLPROVIDER_LIBC;
|
||||
#endif
|
||||
static char *icu_locale = NULL;
|
||||
static char *icu_rules = NULL;
|
||||
static const char *default_text_search_config = NULL;
|
||||
@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that ICU accepts the locale name; or if not specified, retrieve the
|
||||
* default ICU locale.
|
||||
*/
|
||||
static void
|
||||
check_icu_locale(void)
|
||||
{
|
||||
#ifdef USE_ICU
|
||||
UCollator *collator;
|
||||
UErrorCode status;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
collator = ucol_open(icu_locale, &status);
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
if (icu_locale)
|
||||
pg_fatal("could not open collator for locale \"%s\": %s",
|
||||
icu_locale, u_errorName(status));
|
||||
else
|
||||
pg_fatal("could not open collator for default locale: %s",
|
||||
u_errorName(status));
|
||||
}
|
||||
|
||||
/* if not specified, get locale from default collator */
|
||||
if (icu_locale == NULL)
|
||||
{
|
||||
const char *default_locale;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
|
||||
&status);
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
ucol_close(collator);
|
||||
pg_fatal("could not determine default ICU locale");
|
||||
}
|
||||
|
||||
icu_locale = pg_strdup(default_locale);
|
||||
}
|
||||
|
||||
ucol_close(collator);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* set up the locale variables
|
||||
*
|
||||
@ -2081,8 +2132,7 @@ setlocales(void)
|
||||
|
||||
if (locale_provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
if (!icu_locale)
|
||||
pg_fatal("ICU locale must be specified");
|
||||
check_icu_locale();
|
||||
|
||||
/*
|
||||
* In supported builds, the ICU locale ID will be checked by the
|
||||
|
@ -97,11 +97,6 @@ SKIP:
|
||||
|
||||
if ($ENV{with_icu} eq 'yes')
|
||||
{
|
||||
command_fails_like(
|
||||
[ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
|
||||
qr/initdb: error: ICU locale must be specified/,
|
||||
'locale provider ICU requires --icu-locale');
|
||||
|
||||
command_ok(
|
||||
[
|
||||
'initdb', '--no-sync',
|
||||
@ -116,7 +111,7 @@ if ($ENV{with_icu} eq 'yes')
|
||||
'--locale-provider=icu', '--icu-locale=@colNumeric=lower',
|
||||
"$tempdir/dataX"
|
||||
],
|
||||
qr/FATAL: could not open collator for locale/,
|
||||
qr/error: could not open collator for locale/,
|
||||
'fails for invalid ICU locale');
|
||||
|
||||
command_fails_like(
|
||||
|
@ -1758,7 +1758,7 @@ my %tests = (
|
||||
create_sql =>
|
||||
"CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;",
|
||||
regexp => qr/^
|
||||
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E
|
||||
\QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E
|
||||
/xm,
|
||||
like => { pg_dumpall_dbprivs => 1, },
|
||||
},
|
||||
|
@ -13,7 +13,7 @@ program_version_ok('createdb');
|
||||
program_options_handling_ok('createdb');
|
||||
|
||||
my $node = PostgreSQL::Test::Cluster->new('main');
|
||||
$node->init;
|
||||
$node->init(extra => ['--locale-provider=libc']);
|
||||
$node->start;
|
||||
|
||||
$node->issues_sql_like(
|
||||
|
@ -14,9 +14,6 @@ override CPPFLAGS := \
|
||||
'-DSHELLPROG="$(SHELL)"' \
|
||||
$(CPPFLAGS)
|
||||
|
||||
# default encoding for regression tests
|
||||
ENCODING = SQL_ASCII
|
||||
|
||||
ifneq ($(build_os),mingw32)
|
||||
abs_builddir := $(shell pwd)
|
||||
else
|
||||
|
@ -55,7 +55,7 @@ exec sql end declare section;
|
||||
exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw";
|
||||
exec sql disconnect main;
|
||||
|
||||
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw;
|
||||
exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw;
|
||||
exec sql disconnect main;
|
||||
|
||||
exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw;
|
||||
|
@ -117,7 +117,7 @@ main(void)
|
||||
#line 56 "test5.pgc"
|
||||
|
||||
|
||||
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
|
||||
{ ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
|
||||
#line 58 "test5.pgc"
|
||||
|
||||
{ ECPGdisconnect(__LINE__, "main");}
|
||||
|
@ -50,7 +50,7 @@
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_finish: connection main closed
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1
|
||||
[NO_PID]: ECPGconnect: opening database ecpg2_regression on <DEFAULT> port <DEFAULT> with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
[NO_PID]: ecpg_finish: connection main closed
|
||||
[NO_PID]: sqlca: code: 0, state: 00000
|
||||
|
@ -69,7 +69,6 @@ ecpg_test_files = files(
|
||||
ecpg_regress_args = [
|
||||
'--dbname=ecpg1_regression,ecpg2_regression',
|
||||
'--create-role=regress_ecpg_user1,regress_ecpg_user2',
|
||||
'--encoding=SQL_ASCII',
|
||||
]
|
||||
|
||||
tests += {
|
||||
|
@ -12,7 +12,7 @@ if ($ENV{with_icu} ne 'yes')
|
||||
}
|
||||
|
||||
my $node1 = PostgreSQL::Test::Cluster->new('node1');
|
||||
$node1->init;
|
||||
$node1->init(extra => ['--locale-provider=libc']);
|
||||
$node1->start;
|
||||
|
||||
$node1->safe_psql('postgres',
|
||||
|
Loading…
Reference in New Issue
Block a user