mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-27 08:39:28 +08:00
Make initdb behave sanely when the selected locale has codeset "US-ASCII".
Per discussion, this should result in defaulting to SQL_ASCII encoding. The original coding could not support that because it conflated selection of SQL_ASCII encoding with not being able to determine the encoding. Adjust pg_get_encoding_from_locale()'s API to distinguish these cases, and fix callers appropriately. Only initdb actually changes behavior, since the other callers were perfectly content to consider these cases equivalent. Per bug #5178 from Boh Yap. Not going to bother back-patching, since no one has complained before and there's an easy workaround (namely, specify the encoding you want).
This commit is contained in:
parent
19d802767d
commit
8f8a5df694
@ -13,7 +13,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.227 2009/10/07 22:14:18 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.228 2009/11/12 02:46:16 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -334,20 +334,22 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
* Check whether chosen encoding matches chosen locale settings. This
|
* Check whether chosen encoding matches chosen locale settings. This
|
||||||
* restriction is necessary because libc's locale-specific code usually
|
* restriction is necessary because libc's locale-specific code usually
|
||||||
* fails when presented with data in an encoding it's not expecting. We
|
* fails when presented with data in an encoding it's not expecting. We
|
||||||
* allow mismatch in three cases:
|
* allow mismatch in four cases:
|
||||||
*
|
*
|
||||||
* 1. locale encoding = SQL_ASCII, which means either that the locale is
|
* 1. locale encoding = SQL_ASCII, which means that the locale is
|
||||||
* C/POSIX which works with any encoding, or that we couldn't determine
|
* C/POSIX which works with any encoding.
|
||||||
|
*
|
||||||
|
* 2. locale encoding = -1, which means that we couldn't determine
|
||||||
* the locale's encoding and have to trust the user to get it right.
|
* the locale's encoding and have to trust the user to get it right.
|
||||||
*
|
*
|
||||||
* 2. selected encoding is SQL_ASCII, but only if you're a superuser. This
|
|
||||||
* is risky but we have historically allowed it --- notably, the
|
|
||||||
* regression tests require it.
|
|
||||||
*
|
|
||||||
* 3. selected encoding is UTF8 and platform is win32. This is because
|
* 3. selected encoding is UTF8 and platform is win32. This is because
|
||||||
* UTF8 is a pseudo codepage that is supported in all locales since it's
|
* UTF8 is a pseudo codepage that is supported in all locales since it's
|
||||||
* converted to UTF16 before being used.
|
* converted to UTF16 before being used.
|
||||||
*
|
*
|
||||||
|
* 4. selected encoding is SQL_ASCII, but only if you're a superuser. This
|
||||||
|
* is risky but we have historically allowed it --- notably, the
|
||||||
|
* regression tests require it.
|
||||||
|
*
|
||||||
* Note: if you change this policy, fix initdb to match.
|
* Note: if you change this policy, fix initdb to match.
|
||||||
*/
|
*/
|
||||||
ctype_encoding = pg_get_encoding_from_locale(dbctype);
|
ctype_encoding = pg_get_encoding_from_locale(dbctype);
|
||||||
@ -355,6 +357,7 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
|
|
||||||
if (!(ctype_encoding == encoding ||
|
if (!(ctype_encoding == encoding ||
|
||||||
ctype_encoding == PG_SQL_ASCII ||
|
ctype_encoding == PG_SQL_ASCII ||
|
||||||
|
ctype_encoding == -1 ||
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
encoding == PG_UTF8 ||
|
encoding == PG_UTF8 ||
|
||||||
#endif
|
#endif
|
||||||
@ -369,6 +372,7 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
|
|
||||||
if (!(collate_encoding == encoding ||
|
if (!(collate_encoding == encoding ||
|
||||||
collate_encoding == PG_SQL_ASCII ||
|
collate_encoding == PG_SQL_ASCII ||
|
||||||
|
collate_encoding == -1 ||
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
encoding == PG_UTF8 ||
|
encoding == PG_UTF8 ||
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*
|
*
|
||||||
* Tatsuo Ishii
|
* Tatsuo Ishii
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.91 2009/10/17 05:14:52 mha Exp $
|
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.92 2009/11/12 02:46:16 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
@ -984,7 +984,14 @@ int
|
|||||||
GetPlatformEncoding(void)
|
GetPlatformEncoding(void)
|
||||||
{
|
{
|
||||||
if (PlatformEncoding == NULL)
|
if (PlatformEncoding == NULL)
|
||||||
PlatformEncoding = &pg_enc2name_tbl[pg_get_encoding_from_locale("")];
|
{
|
||||||
|
/* try to determine encoding of server's environment locale */
|
||||||
|
int encoding = pg_get_encoding_from_locale("");
|
||||||
|
|
||||||
|
if (encoding < 0)
|
||||||
|
encoding = PG_SQL_ASCII;
|
||||||
|
PlatformEncoding = &pg_enc2name_tbl[encoding];
|
||||||
|
}
|
||||||
return PlatformEncoding->encoding;
|
return PlatformEncoding->encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
* Portions taken from FreeBSD.
|
* Portions taken from FreeBSD.
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.175 2009/09/03 01:40:11 tgl Exp $
|
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.176 2009/11/12 02:46:16 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -2193,21 +2193,14 @@ check_locale_encoding(const char *locale, int user_enc)
|
|||||||
|
|
||||||
locale_enc = pg_get_encoding_from_locale(locale);
|
locale_enc = pg_get_encoding_from_locale(locale);
|
||||||
|
|
||||||
/* We allow selection of SQL_ASCII --- see notes in createdb() */
|
/* See notes in createdb() to understand these tests */
|
||||||
if (!(locale_enc == user_enc ||
|
if (!(locale_enc == user_enc ||
|
||||||
locale_enc == PG_SQL_ASCII ||
|
locale_enc == PG_SQL_ASCII ||
|
||||||
user_enc == PG_SQL_ASCII
|
locale_enc == -1 ||
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
user_enc == PG_UTF8 ||
|
||||||
/*
|
|
||||||
* On win32, if the encoding chosen is UTF8, all locales are OK (assuming
|
|
||||||
* the actual locale name passed the checks above). This is because UTF8
|
|
||||||
* is a pseudo-codepage, that we convert to UTF16 before doing any
|
|
||||||
* operations on, and UTF16 supports all locales.
|
|
||||||
*/
|
|
||||||
|| user_enc == PG_UTF8
|
|
||||||
#endif
|
#endif
|
||||||
))
|
user_enc == PG_SQL_ASCII))
|
||||||
{
|
{
|
||||||
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
|
fprintf(stderr, _("%s: encoding mismatch\n"), progname);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
@ -2851,11 +2844,9 @@ main(int argc, char *argv[])
|
|||||||
|
|
||||||
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
|
ctype_enc = pg_get_encoding_from_locale(lc_ctype);
|
||||||
|
|
||||||
if (ctype_enc == PG_SQL_ASCII &&
|
if (ctype_enc == -1)
|
||||||
!(pg_strcasecmp(lc_ctype, "C") == 0 ||
|
|
||||||
pg_strcasecmp(lc_ctype, "POSIX") == 0))
|
|
||||||
{
|
{
|
||||||
/* Hmm, couldn't recognize the locale's codeset */
|
/* Couldn't recognize the locale's codeset */
|
||||||
fprintf(stderr, _("%s: could not find suitable encoding for locale %s\n"),
|
fprintf(stderr, _("%s: could not find suitable encoding for locale %s\n"),
|
||||||
progname, lc_ctype);
|
progname, lc_ctype);
|
||||||
fprintf(stderr, _("Rerun %s with the -E option.\n"), progname);
|
fprintf(stderr, _("Rerun %s with the -E option.\n"), progname);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.11 2009/02/10 19:29:39 petere Exp $
|
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.12 2009/11/12 02:46:16 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -181,6 +181,8 @@ static const struct encoding_match encoding_match_list[] = {
|
|||||||
|
|
||||||
{PG_SHIFT_JIS_2004, "SJIS_2004"},
|
{PG_SHIFT_JIS_2004, "SJIS_2004"},
|
||||||
|
|
||||||
|
{PG_SQL_ASCII, "US-ASCII"},
|
||||||
|
|
||||||
{PG_SQL_ASCII, NULL} /* end marker */
|
{PG_SQL_ASCII, NULL} /* end marker */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -215,13 +217,13 @@ win32_langinfo(const char *ctype)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Given a setting for LC_CTYPE, return the Postgres ID of the associated
|
* Given a setting for LC_CTYPE, return the Postgres ID of the associated
|
||||||
* encoding, if we can determine it.
|
* encoding, if we can determine it. Return -1 if we can't determine it.
|
||||||
*
|
*
|
||||||
* Pass in NULL to get the encoding for the current locale setting.
|
* Pass in NULL to get the encoding for the current locale setting.
|
||||||
|
* Pass "" to get the encoding selected by the server's environment.
|
||||||
*
|
*
|
||||||
* If the result is PG_SQL_ASCII, callers should treat it as being compatible
|
* If the result is PG_SQL_ASCII, callers should treat it as being compatible
|
||||||
* with any desired encoding. We return this if the locale is C/POSIX or we
|
* with any desired encoding.
|
||||||
* can't determine the encoding.
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pg_get_encoding_from_locale(const char *ctype)
|
pg_get_encoding_from_locale(const char *ctype)
|
||||||
@ -237,17 +239,17 @@ pg_get_encoding_from_locale(const char *ctype)
|
|||||||
|
|
||||||
save = setlocale(LC_CTYPE, NULL);
|
save = setlocale(LC_CTYPE, NULL);
|
||||||
if (!save)
|
if (!save)
|
||||||
return PG_SQL_ASCII; /* setlocale() broken? */
|
return -1; /* setlocale() broken? */
|
||||||
/* must copy result, or it might change after setlocale */
|
/* must copy result, or it might change after setlocale */
|
||||||
save = strdup(save);
|
save = strdup(save);
|
||||||
if (!save)
|
if (!save)
|
||||||
return PG_SQL_ASCII; /* out of memory; unlikely */
|
return -1; /* out of memory; unlikely */
|
||||||
|
|
||||||
name = setlocale(LC_CTYPE, ctype);
|
name = setlocale(LC_CTYPE, ctype);
|
||||||
if (!name)
|
if (!name)
|
||||||
{
|
{
|
||||||
free(save);
|
free(save);
|
||||||
return PG_SQL_ASCII; /* bogus ctype passed in? */
|
return -1; /* bogus ctype passed in? */
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
@ -266,7 +268,7 @@ pg_get_encoding_from_locale(const char *ctype)
|
|||||||
/* much easier... */
|
/* much easier... */
|
||||||
ctype = setlocale(LC_CTYPE, NULL);
|
ctype = setlocale(LC_CTYPE, NULL);
|
||||||
if (!ctype)
|
if (!ctype)
|
||||||
return PG_SQL_ASCII; /* setlocale() broken? */
|
return -1; /* setlocale() broken? */
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
sys = nl_langinfo(CODESET);
|
sys = nl_langinfo(CODESET);
|
||||||
if (sys)
|
if (sys)
|
||||||
@ -277,7 +279,7 @@ pg_get_encoding_from_locale(const char *ctype)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!sys)
|
if (!sys)
|
||||||
return PG_SQL_ASCII; /* out of memory; unlikely */
|
return -1; /* out of memory; unlikely */
|
||||||
|
|
||||||
/* If locale is C or POSIX, we can allow all encodings */
|
/* If locale is C or POSIX, we can allow all encodings */
|
||||||
if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
|
if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
|
||||||
@ -328,12 +330,16 @@ pg_get_encoding_from_locale(const char *ctype)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
free(sys);
|
free(sys);
|
||||||
return PG_SQL_ASCII;
|
return -1;
|
||||||
}
|
}
|
||||||
#else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
|
#else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* stub if no platform support
|
* stub if no platform support
|
||||||
|
*
|
||||||
|
* Note: we could return -1 here, but that would have the effect of
|
||||||
|
* forcing users to specify an encoding to initdb on such platforms.
|
||||||
|
* It seems better to silently default to SQL_ASCII.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
pg_get_encoding_from_locale(const char *ctype)
|
pg_get_encoding_from_locale(const char *ctype)
|
||||||
|
Loading…
Reference in New Issue
Block a user