diff --git a/contrib/citext/expected/citext_utf8.out b/contrib/citext/expected/citext_utf8.out
index 666b07ccec..77b4586d8f 100644
--- a/contrib/citext/expected/citext_utf8.out
+++ b/contrib/citext/expected/citext_utf8.out
@@ -1,9 +1,16 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/citext/expected/citext_utf8_1.out b/contrib/citext/expected/citext_utf8_1.out
index 433e985349..d1e1fe1a9d 100644
--- a/contrib/citext/expected/citext_utf8_1.out
+++ b/contrib/citext/expected/citext_utf8_1.out
@@ -1,9 +1,16 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/citext/sql/citext_utf8.sql b/contrib/citext/sql/citext_utf8.sql
index d068000b42..8530c68dd7 100644
--- a/contrib/citext/sql/citext_utf8.sql
+++ b/contrib/citext/sql/citext_utf8.sql
@@ -1,10 +1,17 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out
index ee0ac71a1c..cef98ee60c 100644
--- a/contrib/unaccent/expected/unaccent.out
+++ b/contrib/unaccent/expected/unaccent.out
@@ -1,3 +1,12 @@
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
CREATE EXTENSION unaccent;
-- must have a UTF8 database
SELECT getdatabaseencoding();
diff --git a/contrib/unaccent/expected/unaccent_1.out b/contrib/unaccent/expected/unaccent_1.out
new file mode 100644
index 0000000000..0a4a3838ab
--- /dev/null
+++ b/contrib/unaccent/expected/unaccent_1.out
@@ -0,0 +1,8 @@
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql
index 3fc0c706be..027dfb964a 100644
--- a/contrib/unaccent/sql/unaccent.sql
+++ b/contrib/unaccent/sql/unaccent.sql
@@ -1,3 +1,14 @@
+
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
CREATE EXTENSION unaccent;
-- must have a UTF8 database
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index c96164195d..05a9c2cf58 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -89,10 +89,28 @@ PostgreSQL documentation
and character set encoding. These can also be set separately for each
database when it is created. initdb determines those
settings for the template databases, which will serve as the default for
- all other databases. By default, initdb uses the
- locale provider libc, takes the locale settings from
- the environment, and determines the encoding from the locale settings.
- This is almost always sufficient, unless there are special requirements.
+ all other databases.
+
+
+
+ By default, initdb uses the ICU library to provide
+ locale services if the server was built with ICU support; otherwise it uses
+ the libc locale provider (see ). To choose the specific ICU locale ID to
+ apply, use the option . Note that for
+ implementation reasons and to support legacy code,
+ initdb will still select and initialize libc locale
+ settings when the ICU locale provider is used.
+
+
+
+ Alternatively, initdb can use the locale provider
+ libc. To select this option, specify
+ --locale-provider=libc, or build the server without ICU
+ support. The libc locale provider takes the locale
+ settings from the environment, and determines the encoding from the locale
+ settings. This is almost always sufficient, unless there are special
+ requirements.
@@ -103,17 +121,6 @@ PostgreSQL documentation
categories can give nonsensical results, so this should be used with care.
-
- Alternatively, the ICU library can be used to provide locale services.
- (Again, this only sets the default for subsequently created databases.) To
- select this option, specify --locale-provider=icu.
- To choose the specific ICU locale ID to apply, use the option
- . Note that
- for implementation reasons and to support legacy code,
- initdb will still select and initialize libc locale
- settings when the ICU locale provider is used.
-
-
When initdb runs, it will print out the locale settings
it has chosen. If you have complex requirements or specified multiple
@@ -234,7 +241,13 @@ PostgreSQL documentation
- Specifies the ICU locale ID, if the ICU locale provider is used.
+ Specifies the ICU locale when the ICU provider is used. Locale support
+ is described in .
+
+
+ If this option is not specified, the locale is inherited from the
+ environment in which initdb runs. The environment's
+ locale is matched to a similar ICU locale name, if possible.
@@ -307,10 +320,12 @@ PostgreSQL documentation
- This option sets the locale provider for databases created in the
- new cluster. It can be overridden in the CREATE
+ This option sets the locale provider for databases created in the new
+ cluster. It can be overridden in the CREATE
DATABASE command when new databases are subsequently
- created. The default is libc.
+ created. The default is icu if the server was
+ built with ICU support; otherwise the default is
+ libc (see ).
diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile
index eab89c5501..d69bd89572 100644
--- a/src/bin/initdb/Makefile
+++ b/src/bin/initdb/Makefile
@@ -16,7 +16,7 @@ subdir = src/bin/initdb
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
+override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS)
# Note: it's important that we link to encnames.o from libpgcommon, not
# from libpq, else we have risks of version skew if we run with a libpq
@@ -24,7 +24,7 @@ override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
# should ensure that that happens.
#
# We need libpq only because fe_utils does.
-LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
+LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS)
# use system timezone data?
ifneq (,$(with_system_tzdata))
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 5e3c6a27c4..bf88cd2439 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -53,6 +53,9 @@
#include
#include
#include
+#ifdef USE_ICU
+#include
+#endif
#include
#include
#include
@@ -133,7 +136,11 @@ static char *lc_monetary = NULL;
static char *lc_numeric = NULL;
static char *lc_time = NULL;
static char *lc_messages = NULL;
+#ifdef USE_ICU
+static char locale_provider = COLLPROVIDER_ICU;
+#else
static char locale_provider = COLLPROVIDER_LIBC;
+#endif
static char *icu_locale = NULL;
static char *icu_rules = NULL;
static const char *default_text_search_config = NULL;
@@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc)
return true;
}
+/*
+ * Check that ICU accepts the locale name; or if not specified, retrieve the
+ * default ICU locale.
+ */
+static void
+check_icu_locale(void)
+{
+#ifdef USE_ICU
+ UCollator *collator;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(icu_locale, &status);
+ if (U_FAILURE(status))
+ {
+ if (icu_locale)
+ pg_fatal("could not open collator for locale \"%s\": %s",
+ icu_locale, u_errorName(status));
+ else
+ pg_fatal("could not open collator for default locale: %s",
+ u_errorName(status));
+ }
+
+ /* if not specified, get locale from default collator */
+ if (icu_locale == NULL)
+ {
+ const char *default_locale;
+
+ status = U_ZERO_ERROR;
+ default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
+ &status);
+ if (U_FAILURE(status))
+ {
+ ucol_close(collator);
+ pg_fatal("could not determine default ICU locale");
+ }
+
+ icu_locale = pg_strdup(default_locale);
+ }
+
+ ucol_close(collator);
+#endif
+}
+
/*
* set up the locale variables
*
@@ -2081,8 +2132,7 @@ setlocales(void)
if (locale_provider == COLLPROVIDER_ICU)
{
- if (!icu_locale)
- pg_fatal("ICU locale must be specified");
+ check_icu_locale();
/*
* In supported builds, the ICU locale ID will be checked by the
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 772769acab..e5d214e09c 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -97,11 +97,6 @@ SKIP:
if ($ENV{with_icu} eq 'yes')
{
- command_fails_like(
- [ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
- qr/initdb: error: ICU locale must be specified/,
- 'locale provider ICU requires --icu-locale');
-
command_ok(
[
'initdb', '--no-sync',
@@ -116,7 +111,7 @@ if ($ENV{with_icu} eq 'yes')
'--locale-provider=icu', '--icu-locale=@colNumeric=lower',
"$tempdir/dataX"
],
- qr/FATAL: could not open collator for locale/,
+ qr/error: could not open collator for locale/,
'fails for invalid ICU locale');
command_fails_like(
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 187e4b8d07..9c354213ce 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -1758,7 +1758,7 @@ my %tests = (
create_sql =>
"CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;",
regexp => qr/^
- \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E
+ \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E
/xm,
like => { pg_dumpall_dbprivs => 1, },
},
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 3ad4fbb00c..8ec58cdd64 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -13,7 +13,7 @@ program_version_ok('createdb');
program_options_handling_ok('createdb');
my $node = PostgreSQL::Test::Cluster->new('main');
-$node->init;
+$node->init(extra => ['--locale-provider=libc']);
$node->start;
$node->issues_sql_like(
diff --git a/src/interfaces/ecpg/test/Makefile b/src/interfaces/ecpg/test/Makefile
index d7a7d1d1ca..cf841a3a5b 100644
--- a/src/interfaces/ecpg/test/Makefile
+++ b/src/interfaces/ecpg/test/Makefile
@@ -14,9 +14,6 @@ override CPPFLAGS := \
'-DSHELLPROG="$(SHELL)"' \
$(CPPFLAGS)
-# default encoding for regression tests
-ENCODING = SQL_ASCII
-
ifneq ($(build_os),mingw32)
abs_builddir := $(shell pwd)
else
diff --git a/src/interfaces/ecpg/test/connect/test5.pgc b/src/interfaces/ecpg/test/connect/test5.pgc
index de29160089..d512553677 100644
--- a/src/interfaces/ecpg/test/connect/test5.pgc
+++ b/src/interfaces/ecpg/test/connect/test5.pgc
@@ -55,7 +55,7 @@ exec sql end declare section;
exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw";
exec sql disconnect main;
- exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw;
+ exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw;
exec sql disconnect main;
exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw;
diff --git a/src/interfaces/ecpg/test/expected/connect-test5.c b/src/interfaces/ecpg/test/expected/connect-test5.c
index c1124c627f..ec1514ed9a 100644
--- a/src/interfaces/ecpg/test/expected/connect-test5.c
+++ b/src/interfaces/ecpg/test/expected/connect-test5.c
@@ -117,7 +117,7 @@ main(void)
#line 56 "test5.pgc"
- { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
+ { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
#line 58 "test5.pgc"
{ ECPGdisconnect(__LINE__, "main");}
diff --git a/src/interfaces/ecpg/test/expected/connect-test5.stderr b/src/interfaces/ecpg/test/expected/connect-test5.stderr
index 01a6a0a13b..51cc18916a 100644
--- a/src/interfaces/ecpg/test/expected/connect-test5.stderr
+++ b/src/interfaces/ecpg/test/expected/connect-test5.stderr
@@ -50,7 +50,7 @@
[NO_PID]: sqlca: code: 0, state: 00000
[NO_PID]: ecpg_finish: connection main closed
[NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ECPGconnect: opening database ecpg2_regression on port with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1
+[NO_PID]: ECPGconnect: opening database ecpg2_regression on port with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1
[NO_PID]: sqlca: code: 0, state: 00000
[NO_PID]: ecpg_finish: connection main closed
[NO_PID]: sqlca: code: 0, state: 00000
diff --git a/src/interfaces/ecpg/test/meson.build b/src/interfaces/ecpg/test/meson.build
index d0be73ccf9..04c6819a79 100644
--- a/src/interfaces/ecpg/test/meson.build
+++ b/src/interfaces/ecpg/test/meson.build
@@ -69,7 +69,6 @@ ecpg_test_files = files(
ecpg_regress_args = [
'--dbname=ecpg1_regression,ecpg2_regression',
'--create-role=regress_ecpg_user1,regress_ecpg_user2',
- '--encoding=SQL_ASCII',
]
tests += {
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 80ab1c7789..45d77c319a 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -12,7 +12,7 @@ if ($ENV{with_icu} ne 'yes')
}
my $node1 = PostgreSQL::Test::Cluster->new('node1');
-$node1->init;
+$node1->init(extra => ['--locale-provider=libc']);
$node1->start;
$node1->safe_psql('postgres',