From eb335a034b571516cfbb2309a46177b435c9f913 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Thu, 18 Jul 2002 02:02:30 +0000 Subject: [PATCH] I have committed many support files for CREATE CONVERSION. Default conversion procs and conversions are added in initdb. Currently supported conversions are: UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR, EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC, JOHAB, TCVN EUC_JP <--> SJIS EUC_TW <--> BIG5 MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5 Note that initial contents of pg_conversion system catalog are created in the initdb process. So doing initdb required is ideal, it's possible to add them to your databases by hand, however. To accomplish this: psql -f your_postgresql_install_path/share/conversion_create.sql your_database So I did not bump up the version in cataversion.h. TODO: Add more conversion procs Add [CASCADE|RESTRICT] to DROP CONVERSION Add tuples to pg_depend Add regression tests Write docs Add SQL99 CONVERT command? -- Tatsuo Ishii --- src/backend/Makefile | 3 +- src/backend/commands/variable.c | 37 +- src/backend/parser/gram.y | 31 +- src/backend/utils/mb/Makefile | 16 +- src/backend/utils/mb/big5.c | 378 --------- src/backend/utils/mb/conv.c | 1246 +---------------------------- src/backend/utils/mb/mbutils.c | 254 ++---- src/backend/utils/mb/sjis.map | 396 --------- src/backend/utils/misc/guc.c | 5 +- src/bin/initdb/initdb.sh | 7 +- src/bin/psql/command.c | 15 +- src/include/mb/pg_wchar.h | 49 +- src/interfaces/libpq/fe-connect.c | 12 +- 13 files changed, 173 insertions(+), 2276 deletions(-) delete mode 100644 src/backend/utils/mb/big5.c delete mode 100644 src/backend/utils/mb/sjis.map diff --git a/src/backend/Makefile b/src/backend/Makefile index cfd065d6c2f..925340c3b19 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -4,7 +4,7 @@ # # Copyright (c) 1994, Regents of the University of California # -# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.80 2002/07/16 05:46:35 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/Makefile,v 1.81 2002/07/18 02:02:29 ishii Exp $ # #------------------------------------------------------------------------- @@ -131,6 +131,7 @@ endif ifeq ($(enable_nls), yes) $(MAKE) -C po $@ endif + $(MAKE) -C utils/mb $@ install-bin: postgres $(POSTGRES_IMP) installdirs $(INSTALL_PROGRAM) postgres$(X) $(DESTDIR)$(bindir)/postgres$(X) diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 70e7e88d60c..55a11a766cd 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/variable.c,v 1.69 2002/06/20 20:29:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/variable.c,v 1.70 2002/07/18 02:02:29 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -27,15 +27,7 @@ #include "utils/guc.h" #include "utils/syscache.h" #include "utils/tqual.h" - -#ifdef MULTIBYTE #include "mb/pg_wchar.h" -#else -/* Grand unified hard-coded badness */ -#define pg_get_client_encoding_name() "SQL_ASCII" -#define GetDatabaseEncodingName() "SQL_ASCII" -#endif - /* * DATESTYLE @@ -472,43 +464,30 @@ show_random_seed(void) /* - * MULTIBYTE-related functions - * - * If MULTIBYTE support was not compiled, we still allow these variables - * to exist, but you can't set them to anything but "SQL_ASCII". This - * minimizes interoperability problems between non-MB servers and MB-enabled - * clients. + * encoding handling functions */ const char * assign_client_encoding(const char *value, bool doit, bool interactive) { -#ifdef MULTIBYTE int encoding; - int old_encoding = 0; encoding = pg_valid_client_encoding(value); if (encoding < 0) return NULL; - /* - * Ugly API here ... can't test validity without setting new encoding... + + /* XXX SetClientEncoding depends on namespace functions which are + * not available at startup time. So we accept requested client + * encoding anyway which might not be valid (e.g. no conversion + * procs available). */ - if (!doit) - old_encoding = pg_get_client_encoding(); - if (pg_set_client_encoding(encoding) < 0) + if (SetClientEncoding(encoding, doit) < 0) { if (interactive) elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); return NULL; } - if (!doit) - pg_set_client_encoding(old_encoding); -#else - if (strcasecmp(value, pg_get_client_encoding_name()) != 0) - return NULL; -#endif - return value; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 9b8e52620a6..33b534f8eff 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.341 2002/07/16 22:12:20 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.342 2002/07/18 02:02:30 ishii Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -62,13 +62,7 @@ #include "utils/numeric.h" #include "utils/datetime.h" #include "utils/date.h" - -#ifdef MULTIBYTE #include "mb/pg_wchar.h" -#else -#define GetStandardEncoding() 0 /* PG_SQL_ASCII */ -#define GetStandardEncodingName() "SQL_ASCII" -#endif extern List *parsetree; /* final parse result is delivered here */ @@ -3570,28 +3564,23 @@ createdb_opt_item: | ENCODING opt_equal Sconst { int encoding; -#ifdef MULTIBYTE - encoding = pg_char_to_encoding($3); - if (encoding == -1) + + if (pg_valid_server_encoding($3) < 0) elog(ERROR, "%s is not a valid encoding name", $3); -#else - if (strcasecmp($3, GetStandardEncodingName()) != 0) - elog(ERROR, "Multi-byte support is not enabled"); - encoding = GetStandardEncoding(); -#endif + encoding = pg_char_to_encoding($3); + $$ = makeNode(DefElem); $$->defname = "encoding"; $$->arg = (Node *)makeInteger(encoding); } | ENCODING opt_equal Iconst { -#ifdef MULTIBYTE - if (!pg_get_enconv_by_encoding($3)) + const char *encoding_name; + + encoding_name = pg_encoding_to_char($3); + if (!strcmp(encoding_name,"") || + pg_valid_server_encoding(encoding_name) < 0) elog(ERROR, "%d is not a valid encoding code", $3); -#else - if ($3 != GetStandardEncoding()) - elog(ERROR, "Multi-byte support is not enabled"); -#endif $$ = makeNode(DefElem); $$->defname = "encoding"; $$->arg = (Node *)makeInteger($3); diff --git a/src/backend/utils/mb/Makefile b/src/backend/utils/mb/Makefile index bf8fcdad63c..64b60a510a4 100644 --- a/src/backend/utils/mb/Makefile +++ b/src/backend/utils/mb/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/mb # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.17 2001/09/22 08:44:47 ishii Exp $ +# $Header: /cvsroot/pgsql/src/backend/utils/mb/Makefile,v 1.18 2002/07/18 02:02:30 ishii Exp $ # #------------------------------------------------------------------------- @@ -12,19 +12,23 @@ subdir = src/backend/utils/mb top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = encnames.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o big5.o +OBJS = encnames.o conv.o mbutils.o wchar.o wstrcmp.o wstrncmp.o +DIRS = conversion_procs -all: SUBSYS.o +all install installdirs uninstal distprep: SUBSYS.o + @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done + +clean distclean maintainer-clean: + rm -f SUBSYS.o $(OBJS) + @for dir in $(DIRS); do $(MAKE) -C $$dir $@; done SUBSYS.o: $(OBJS) + @for dir in $(DIRS); do $(MAKE) -C $$dir all || exit; done $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) depend dep: $(CC) -MM $(CFLAGS) *.c >depend -clean: - rm -f SUBSYS.o $(OBJS) - ifeq (depend,$(wildcard depend)) include depend endif diff --git a/src/backend/utils/mb/big5.c b/src/backend/utils/mb/big5.c deleted file mode 100644 index 7ebaafb0790..00000000000 --- a/src/backend/utils/mb/big5.c +++ /dev/null @@ -1,378 +0,0 @@ -/* - * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 - * plane 1 and plane 2). - * This program is partially copied from lv(Multilingual file viewer) - * and slightly modified. lv is written and copyrighted by NARITA Tomio - * (nrt@web.ad.jp). - * - * 1999/1/15 Tatsuo Ishii - * - * $Id: big5.c,v 1.8 2001/10/25 05:49:51 momjian Exp $ - */ - -/* can be used in either frontend or backend */ -#include "postgres_fe.h" - -#include "mb/pg_wchar.h" - -typedef struct -{ - unsigned short code, - peer; -} codes_t; - -/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ -static codes_t big5Level1ToCnsPlane1[25] = { /* range */ - {0xA140, 0x2121}, - {0xA1F6, 0x2258}, - {0xA1F7, 0x2257}, - {0xA1F8, 0x2259}, - {0xA2AF, 0x2421}, - {0xA3C0, 0x4221}, - {0xa3e1, 0x0000}, - {0xA440, 0x4421}, - {0xACFE, 0x5753}, - {0xacff, 0x0000}, - {0xAD40, 0x5323}, - {0xAFD0, 0x5754}, - {0xBBC8, 0x6B51}, - {0xBE52, 0x6B50}, - {0xBE53, 0x6F5C}, - {0xC1AB, 0x7536}, - {0xC2CB, 0x7535}, - {0xC2CC, 0x7737}, - {0xC361, 0x782E}, - {0xC3B9, 0x7865}, - {0xC3BA, 0x7864}, - {0xC3BB, 0x7866}, - {0xC456, 0x782D}, - {0xC457, 0x7962}, - {0xc67f, 0x0000} -}; - -/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ -static codes_t cnsPlane1ToBig5Level1[26] = { /* range */ - {0x2121, 0xA140}, - {0x2257, 0xA1F7}, - {0x2258, 0xA1F6}, - {0x2259, 0xA1F8}, - {0x234f, 0x0000}, - {0x2421, 0xA2AF}, - {0x2571, 0x0000}, - {0x4221, 0xA3C0}, - {0x4242, 0x0000}, - {0x4421, 0xA440}, - {0x5323, 0xAD40}, - {0x5753, 0xACFE}, - {0x5754, 0xAFD0}, - {0x6B50, 0xBE52}, - {0x6B51, 0xBBC8}, - {0x6F5C, 0xBE53}, - {0x7535, 0xC2CB}, - {0x7536, 0xC1AB}, - {0x7737, 0xC2CC}, - {0x782D, 0xC456}, - {0x782E, 0xC361}, - {0x7864, 0xC3BA}, - {0x7865, 0xC3B9}, - {0x7866, 0xC3BB}, - {0x7962, 0xC457}, - {0x7d4c, 0x0000} -}; - -/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ -static codes_t big5Level2ToCnsPlane2[48] = { /* range */ - {0xC940, 0x2121}, - {0xc94a, 0x0000}, - {0xC94B, 0x212B}, - {0xC96C, 0x214D}, - {0xC9BE, 0x214C}, - {0xC9BF, 0x217D}, - {0xC9ED, 0x224E}, - {0xCAF7, 0x224D}, - {0xCAF8, 0x2439}, - {0xD77A, 0x3F6A}, - {0xD77B, 0x387E}, - {0xDBA7, 0x3F6B}, - {0xDDFC, 0x4176}, - {0xDDFD, 0x4424}, - {0xE8A3, 0x554C}, - {0xE976, 0x5723}, - {0xEB5B, 0x5A29}, - {0xEBF1, 0x554B}, - {0xEBF2, 0x5B3F}, - {0xECDE, 0x5722}, - {0xECDF, 0x5C6A}, - {0xEDAA, 0x5D75}, - {0xEEEB, 0x642F}, - {0xEEEC, 0x6039}, - {0xF056, 0x5D74}, - {0xF057, 0x6243}, - {0xF0CB, 0x5A28}, - {0xF0CC, 0x6337}, - {0xF163, 0x6430}, - {0xF16B, 0x6761}, - {0xF16C, 0x6438}, - {0xF268, 0x6934}, - {0xF269, 0x6573}, - {0xF2C3, 0x664E}, - {0xF375, 0x6762}, - {0xF466, 0x6935}, - {0xF4B5, 0x664D}, - {0xF4B6, 0x6962}, - {0xF4FD, 0x6A4C}, - {0xF663, 0x6A4B}, - {0xF664, 0x6C52}, - {0xF977, 0x7167}, - {0xF9C4, 0x7166}, - {0xF9C5, 0x7234}, - {0xF9C6, 0x7240}, - {0xF9C7, 0x7235}, - {0xF9D2, 0x7241}, - {0xf9d6, 0x0000} -}; - -/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ -static codes_t cnsPlane2ToBig5Level2[49] = { /* range */ - {0x2121, 0xC940}, - {0x212B, 0xC94B}, - {0x214C, 0xC9BE}, - {0x214D, 0xC96C}, - {0x217D, 0xC9BF}, - {0x224D, 0xCAF7}, - {0x224E, 0xC9ED}, - {0x2439, 0xCAF8}, - {0x387E, 0xD77B}, - {0x3F6A, 0xD77A}, - {0x3F6B, 0xDBA7}, - {0x4424, 0x0000}, - {0x4176, 0xDDFC}, - {0x4177, 0x0000}, - {0x4424, 0xDDFD}, - {0x554B, 0xEBF1}, - {0x554C, 0xE8A3}, - {0x5722, 0xECDE}, - {0x5723, 0xE976}, - {0x5A28, 0xF0CB}, - {0x5A29, 0xEB5B}, - {0x5B3F, 0xEBF2}, - {0x5C6A, 0xECDF}, - {0x5D74, 0xF056}, - {0x5D75, 0xEDAA}, - {0x6039, 0xEEEC}, - {0x6243, 0xF057}, - {0x6337, 0xF0CC}, - {0x642F, 0xEEEB}, - {0x6430, 0xF163}, - {0x6438, 0xF16C}, - {0x6573, 0xF269}, - {0x664D, 0xF4B5}, - {0x664E, 0xF2C3}, - {0x6761, 0xF16B}, - {0x6762, 0xF375}, - {0x6934, 0xF268}, - {0x6935, 0xF466}, - {0x6962, 0xF4B6}, - {0x6A4B, 0xF663}, - {0x6A4C, 0xF4FD}, - {0x6C52, 0xF664}, - {0x7166, 0xF9C4}, - {0x7167, 0xF977}, - {0x7234, 0xF9C5}, - {0x7235, 0xF9C7}, - {0x7240, 0xF9C6}, - {0x7241, 0xF9D2}, - {0x7245, 0x0000} -}; - -/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ -static unsigned short b1c4[][2] = { - {0xC879, 0x2123}, - {0xC87B, 0x2124}, - {0xC87D, 0x212A}, - {0xC8A2, 0x2152} -}; - -/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ -static unsigned short b2c3[][2] = { - {0xF9D6, 0x4337}, - {0xF9D7, 0x4F50}, - {0xF9D8, 0x444E}, - {0xF9D9, 0x504A}, - {0xF9DA, 0x2C5D}, - {0xF9DB, 0x3D7E}, - {0xF9DC, 0x4B5C} -}; - -static unsigned short BinarySearchRange - (codes_t *array, int high, unsigned short code) -{ - int low, - mid, - distance, - tmp; - - low = 0; - mid = high >> 1; - - for (; low <= high; mid = (low + high) >> 1) - { - if ((array[mid].code <= code) && (array[mid + 1].code > code)) - { - if (0 == array[mid].peer) - return 0; - if (code >= 0xa140U) - { - /* big5 to cns */ - tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; - high = code & 0x00ff; - low = array[mid].code & 0x00ff; - - /* - * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, - * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix - * is 0x9d. [region_low, region_high] - * We should remember big5 has two different regions - * (above). There is a bias for the distance between these - * regions. 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 - * and 0x7e is 1.) bias = - 0x22. - */ - distance = tmp * 0x9d + high - low + - (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22) - : (low >= 0xa1 ? +0x22 : 0)); - - /* - * NOTE: we have to convert the distance into a code - * point. The code point's low_byte is 0x21 plus mod_0x5e. - * In the first, we extract the mod_0x5e of the starting - * code point, subtracting 0x21, and add distance to it. - * Then we calculate again mod_0x5e of them, and restore - * the final codepoint, adding 0x21. - */ - tmp = (array[mid].peer & 0x00ff) + distance - 0x21; - tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8) - + 0x21 + tmp % 0x5e; - return tmp; - } - else - { - /* cns to big5 */ - tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; - - /* - * NOTE: ISO charsets ranges between 0x21-0xfe - * (94charset). Its radix is 0x5e. But there is no - * distance bias like big5. - */ - distance = tmp * 0x5e - + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff)); - - /* - * NOTE: Similar to big5 to cns conversion, we extract - * mod_0x9d and restore mod_0x9d into a code point. - */ - low = array[mid].peer & 0x00ff; - tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40); - low = tmp % 0x9d; - tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8) - + (low > 0x3e ? 0x62 : 0x40) + low; - return tmp; - } - } - else if (array[mid].code > code) - high = mid - 1; - else - low = mid + 1; - } - - return 0; -} - - -unsigned short -BIG5toCNS(unsigned short big5, unsigned char *lc) -{ - unsigned short cns = 0; - int i; - - if (big5 < 0xc940U) - { - /* level 1 */ - - for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++) - { - if (b1c4[i][0] == big5) - { - *lc = LC_CNS11643_4; - return (b1c4[i][1] | 0x8080U); - } - } - - if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5))) - *lc = LC_CNS11643_1; - } - else if (big5 == 0xc94aU) - { - /* level 2 */ - *lc = LC_CNS11643_1; - cns = 0x4442; - } - else - { - /* level 2 */ - for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++) - { - if (b2c3[i][0] == big5) - { - *lc = LC_CNS11643_3; - return (b2c3[i][1] | 0x8080U); - } - } - - if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5))) - *lc = LC_CNS11643_2; - } - - if (0 == cns) - { /* no mapping Big5 to CNS 11643-1992 */ - *lc = 0; - return (unsigned short) '?'; - } - - return cns | 0x8080; -} - -unsigned short -CNStoBIG5(unsigned short cns, unsigned char lc) -{ - int i; - unsigned int big5 = 0; - - cns &= 0x7f7f; - - switch (lc) - { - case LC_CNS11643_1: - big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns); - break; - case LC_CNS11643_2: - big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns); - break; - case LC_CNS11643_3: - for (i = 0; i < sizeof(b2c3) / sizeof(unsigned short); i++) - { - if (b2c3[i][1] == cns) - return (b2c3[i][0]); - } - break; - case LC_CNS11643_4: - for (i = 0; i < sizeof(b1c4) / sizeof(unsigned short); i++) - { - if (b1c4[i][1] == cns) - return (b1c4[i][0]); - } - default: - break; - } - return big5; -} diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index cdfa54ac65c..095ca562cc0 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -1,104 +1,24 @@ -/* - * conversion between client encoding and server internal encoding - * (currently mule internal code (mic) is used) - * Tatsuo Ishii +/*------------------------------------------------------------------------- * - * WIN1250 client encoding support contributed by Pavel Behal - * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya + * Utility functions for conversion procs. * - * $Id: conv.c,v 1.38 2002/06/13 08:28:54 ishii Exp $ + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/utils/mb/conv.c,v 1.39 2002/07/18 02:02:30 ishii Exp $ * + *------------------------------------------------------------------------- */ #include "postgres.h" #include "mb/pg_wchar.h" -#ifdef UNICODE_CONVERSION - -/* - * for Unicode (UTF-8) support - */ -#include "Unicode/utf8_to_iso8859_2.map" -#include "Unicode/utf8_to_iso8859_3.map" -#include "Unicode/utf8_to_iso8859_4.map" -#include "Unicode/utf8_to_iso8859_5.map" -#include "Unicode/utf8_to_iso8859_6.map" -#include "Unicode/utf8_to_iso8859_7.map" -#include "Unicode/utf8_to_iso8859_8.map" -#include "Unicode/utf8_to_iso8859_9.map" -#include "Unicode/utf8_to_iso8859_10.map" -#include "Unicode/utf8_to_iso8859_13.map" -#include "Unicode/utf8_to_iso8859_14.map" -#include "Unicode/utf8_to_iso8859_15.map" -#include "Unicode/utf8_to_iso8859_16.map" -#include "Unicode/iso8859_2_to_utf8.map" -#include "Unicode/iso8859_3_to_utf8.map" -#include "Unicode/iso8859_4_to_utf8.map" -#include "Unicode/iso8859_5_to_utf8.map" -#include "Unicode/iso8859_6_to_utf8.map" -#include "Unicode/iso8859_7_to_utf8.map" -#include "Unicode/iso8859_8_to_utf8.map" -#include "Unicode/iso8859_9_to_utf8.map" -#include "Unicode/iso8859_10_to_utf8.map" -#include "Unicode/iso8859_13_to_utf8.map" -#include "Unicode/iso8859_14_to_utf8.map" -#include "Unicode/iso8859_15_to_utf8.map" -#include "Unicode/iso8859_16_to_utf8.map" -#include "Unicode/utf8_to_euc_jp.map" -#include "Unicode/euc_jp_to_utf8.map" -#include "Unicode/utf8_to_euc_cn.map" -#include "Unicode/euc_cn_to_utf8.map" -#include "Unicode/utf8_to_gb18030.map" -#include "Unicode/gb18030_to_utf8.map" -#include "Unicode/utf8_to_euc_kr.map" -#include "Unicode/euc_kr_to_utf8.map" -#include "Unicode/utf8_to_euc_tw.map" -#include "Unicode/euc_tw_to_utf8.map" -#include "Unicode/utf8_to_sjis.map" -#include "Unicode/sjis_to_utf8.map" -#include "Unicode/utf8_to_big5.map" -#include "Unicode/big5_to_utf8.map" -#include "Unicode/utf8_to_gbk.map" -#include "Unicode/gbk_to_utf8.map" -#include "Unicode/utf8_to_uhc.map" -#include "Unicode/uhc_to_utf8.map" -#include "Unicode/utf8_to_johab.map" -#include "Unicode/johab_to_utf8.map" -#include "Unicode/utf8_to_tcvn.map" -#include "Unicode/tcvn_to_utf8.map" -#include "Unicode/utf8_to_win1250.map" -#include "Unicode/win1250_to_utf8.map" -#include "Unicode/utf8_to_win1256.map" -#include "Unicode/win1256_to_utf8.map" -#include "Unicode/utf8_to_win874.map" -#include "Unicode/win874_to_utf8.map" -/* Cyrillic charset conversion */ -#include "Unicode/alt_to_utf8.map" -#include "Unicode/koi8r_to_utf8.map" -#include "Unicode/win1251_to_utf8.map" -#include "Unicode/utf8_to_alt.map" -#include "Unicode/utf8_to_koi8r.map" -#include "Unicode/utf8_to_win1251.map" -#endif /* UNICODE_CONVERSION */ - -/* - * SJIS alternative code. - * this code is used if a mapping EUC -> SJIS is not defined. - */ -#define PGSJISALTCODE 0x81ac -#define PGEUCALTCODE 0xa2ae - -/* - * conversion table between SJIS UDC (IBM kanji) and EUC_JP - */ -#include "sjis.map" - /* * convert bogus chars that cannot be represented in the current * encoding system. */ -static void -printBogusChar(unsigned char **mic, unsigned char **p) +void +pg_print_bogus_char(unsigned char **mic, unsigned char **p) { char strbuf[16]; int l = pg_mic_mblen(*mic); @@ -113,298 +33,6 @@ printBogusChar(unsigned char **mic, unsigned char **p) *(*p)++ = ')'; } -/* - * SJIS ---> MIC - */ -static void -sjis2mic(unsigned char *sjis, unsigned char *p, int len) -{ - int c1, - c2, -/* Eiji Tokuya patched begin */ - i, - k, - k2; - -/* Eiji Tokuya patched end */ - while (len > 0 && (c1 = *sjis++)) - { - if (c1 >= 0xa1 && c1 <= 0xdf) - { - /* JIS X0201 (1 byte kana) */ - len--; - *p++ = LC_JISX0201K; - *p++ = c1; - } - else if (c1 > 0x7f) - { - /* - * JIS X0208, X0212, user defined extended characters - */ - c2 = *sjis++; - k = (c1 << 8) + c2; -/* Eiji Tokuya patched begin */ - if (k >= 0xed40 && k < 0xf040) - { - /* NEC selection IBM kanji */ - for (i = 0;; i++) - { - k2 = ibmkanji[i].nec; - if (k2 == 0xffff) - break; - if (k2 == k) - { - k = ibmkanji[i].sjis; - c1 = (k >> 8) & 0xff; - c2 = k & 0xff; - } - } - } - - if (k < 0xeb3f) -/* Eiji Tokuya patched end */ - { - /* JIS X0208 */ - len -= 2; - *p++ = LC_JISX0208; - *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e); - *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); - } -/* Eiji Tokuya patched begin */ - else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc)) - { - /* NEC selection IBM kanji - Other undecided justice */ -/* Eiji Tokuya patched end */ - *p++ = LC_JISX0208; - *p++ = PGEUCALTCODE >> 8; - *p++ = PGEUCALTCODE & 0xff; - } - else if (k >= 0xf040 && k < 0xf540) - { - /* - * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 - - * 0x7e7e EUC 0xf5a1 - 0xfefe - */ - len -= 2; - *p++ = LC_JISX0208; - c1 -= 0x6f; - *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); - *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); - } - else if (k >= 0xf540 && k < 0xfa40) - { - /* - * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 - - * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe - */ - len -= 2; - *p++ = LC_JISX0212; - c1 -= 0x74; - *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); - *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); - } - else if (k >= 0xfa40) - { - /* - * mapping IBM kanji to X0208 and X0212 - * - */ - len -= 2; - for (i = 0;; i++) - { - k2 = ibmkanji[i].sjis; - if (k2 == 0xffff) - break; - if (k2 == k) - { - k = ibmkanji[i].euc; - if (k >= 0x8f0000) - { - *p++ = LC_JISX0212; - *p++ = 0x80 | ((k & 0xff00) >> 8); - *p++ = 0x80 | (k & 0xff); - } - else - { - *p++ = LC_JISX0208; - *p++ = 0x80 | (k >> 8); - *p++ = 0x80 | (k & 0xff); - } - } - } - } - } - else - { /* should be ASCII */ - len--; - *p++ = c1; - } - } - *p = '\0'; -} - -/* - * MIC ---> SJIS - */ -static void -mic2sjis(unsigned char *mic, unsigned char *p, int len) -{ - int c1, - c2, - k; - - while (len > 0 && (c1 = *mic)) - { - len -= pg_mic_mblen(mic++); - - if (c1 == LC_JISX0201K) - *p++ = *mic++; - else if (c1 == LC_JISX0208) - { - c1 = *mic++; - c2 = *mic++; - k = (c1 << 8) | (c2 & 0xff); - if (k >= 0xf5a1) - { - /* UDC1 */ - c1 -= 0x54; - *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f; - } - else - *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1); - *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); - } - else if (c1 == LC_JISX0212) - { - int i, - k2; - - c1 = *mic++; - c2 = *mic++; - k = c1 << 8 | c2; - if (k >= 0xf5a1) - { - /* UDC2 */ - c1 -= 0x54; - *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74; - *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); - } - else - { - /* IBM kanji */ - for (i = 0;; i++) - { - k2 = ibmkanji[i].euc & 0xffff; - if (k2 == 0xffff) - { - *p++ = PGSJISALTCODE >> 8; - *p++ = PGSJISALTCODE & 0xff; - break; - } - if (k2 == k) - { - k = ibmkanji[i].sjis; - *p++ = k >> 8; - *p++ = k & 0xff; - break; - } - } - } - } - else if (c1 > 0x7f) - { - /* cannot convert to SJIS! */ - *p++ = PGSJISALTCODE >> 8; - *p++ = PGSJISALTCODE & 0xff; - } - else - { /* should be ASCII */ - *p++ = c1; - } - } - *p = '\0'; -} - -/* - * EUC_JP ---> MIC - */ -static void -euc_jp2mic(unsigned char *euc, unsigned char *p, int len) -{ - int c1; - - while (len > 0 && (c1 = *euc++)) - { - if (c1 == SS2) - { /* 1 byte kana? */ - len -= 2; - *p++ = LC_JISX0201K; - *p++ = *euc++; - } - else if (c1 == SS3) - { /* JIS X0212 kanji? */ - len -= 3; - *p++ = LC_JISX0212; - *p++ = *euc++; - *p++ = *euc++; - } - else if (c1 & 0x80) - { /* kanji? */ - len -= 2; - *p++ = LC_JISX0208; - *p++ = c1; - *p++ = *euc++; - } - else - { /* should be ASCII */ - len--; - *p++ = c1; - } - } - *p = '\0'; -} - -/* - * MIC ---> EUC_JP - */ -static void -mic2euc_jp(unsigned char *mic, unsigned char *p, int len) -{ - int c1; - - while (len > 0 && (c1 = *mic)) - { - len -= pg_mic_mblen(mic++); - - if (c1 == LC_JISX0201K) - { - *p++ = SS2; - *p++ = *mic++; - } - else if (c1 == LC_JISX0212) - { - *p++ = SS3; - *p++ = *mic++; - *p++ = *mic++; - } - else if (c1 == LC_JISX0208) - { - *p++ = *mic++; - *p++ = *mic++; - } - else if (c1 > 0x7f) - { /* cannot convert to EUC_JP! */ - mic--; - printBogusChar(&mic, &p); - } - else - { /* should be ASCII */ - *p++ = c1; - } - } - *p = '\0'; -} - /* * EUC_KR ---> MIC */ @@ -451,7 +79,7 @@ mic2euc_kr(unsigned char *mic, unsigned char *p, int len) else if (c1 > 0x7f) { /* cannot convert to EUC_KR! */ mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); } else { /* should be ASCII */ @@ -507,7 +135,7 @@ mic2euc_cn(unsigned char *mic, unsigned char *p, int len) else if (c1 > 0x7f) { /* cannot convert to EUC_CN! */ mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); } else { /* should be ASCII */ @@ -517,6 +145,7 @@ mic2euc_cn(unsigned char *mic, unsigned char *p, int len) *p = '\0'; } +#ifdef NOT_USED /* * GB18030 ---> MIC * Added by Bill Huang , @@ -594,206 +223,19 @@ mic2gb18030(unsigned char *mic, unsigned char *p, int len) } else{ mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); } } else{ mic--; - printBogusChar(&mic, &p); - } - } - *p = '\0'; -} - -/* - * EUC_TW ---> MIC - */ -static void -euc_tw2mic(unsigned char *euc, unsigned char *p, int len) -{ - int c1; - - while (len > 0 && (c1 = *euc++)) - { - if (c1 == SS2) - { - len -= 4; - c1 = *euc++; /* plane No. */ - if (c1 == 0xa1) - *p++ = LC_CNS11643_1; - else if (c1 == 0xa2) - *p++ = LC_CNS11643_2; - else - { - *p++ = 0x9d; /* LCPRV2 */ - *p++ = 0xa3 - c1 + LC_CNS11643_3; - } - *p++ = *euc++; - *p++ = *euc++; - } - else if (c1 & 0x80) - { /* CNS11643-1 */ - len -= 2; - *p++ = LC_CNS11643_1; - *p++ = c1; - *p++ = *euc++; - } - else - { /* should be ASCII */ - len--; - *p++ = c1; - } - } - *p = '\0'; -} - -/* - * MIC ---> EUC_TW - */ -static void -mic2euc_tw(unsigned char *mic, unsigned char *p, int len) -{ - int c1; - - while (len > 0 && (c1 = *mic)) - { - len -= pg_mic_mblen(mic++); - - if (c1 == LC_CNS11643_1) - { - *p++ = *mic++; - *p++ = *mic++; - } - else if (c1 == LC_CNS11643_2) - { - *p++ = SS2; - *p++ = 0xa2; - *p++ = *mic++; - *p++ = *mic++; - } - else if (c1 == 0x9d) - { /* LCPRV2? */ - *p++ = SS2; - *p++ = *mic++ - LC_CNS11643_3 + 0xa3; - *p++ = *mic++; - *p++ = *mic++; - } - else if (c1 > 0x7f) - { /* cannot convert to EUC_TW! */ - mic--; - printBogusChar(&mic, &p); - } - else - { /* should be ASCII */ - *p++ = c1; - } - } - *p = '\0'; -} - -/* - * Big5 ---> MIC - */ -static void -big52mic(unsigned char *big5, unsigned char *p, int len) -{ - unsigned short c1; - unsigned short big5buf, - cnsBuf; - unsigned char lc; - char bogusBuf[3]; - int i; - - while (len > 0 && (c1 = *big5++)) - { - if (c1 <= 0x7fU) - { /* ASCII */ - len--; - *p++ = c1; - } - else - { - len -= 2; - big5buf = c1 << 8; - c1 = *big5++; - big5buf |= c1; - cnsBuf = BIG5toCNS(big5buf, &lc); - if (lc != 0) - { - if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) - { - *p++ = 0x9d; /* LCPRV2 */ - } - *p++ = lc; /* Plane No. */ - *p++ = (cnsBuf >> 8) & 0x00ff; - *p++ = cnsBuf & 0x00ff; - } - else - { /* cannot convert */ - big5 -= 2; - *p++ = '('; - for (i = 0; i < 2; i++) - { - sprintf(bogusBuf, "%02x", *big5++); - *p++ = bogusBuf[0]; - *p++ = bogusBuf[1]; - } - *p++ = ')'; - } - } - } - *p = '\0'; -} - -/* - * MIC ---> Big5 - */ -static void -mic2big5(unsigned char *mic, unsigned char *p, int len) -{ - int l; - unsigned short c1; - unsigned short big5buf, - cnsBuf; - - while (len > 0 && (c1 = *mic)) - { - l = pg_mic_mblen(mic++); - len -= l; - - /* 0x9d means LCPRV2 */ - if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d) - { - if (c1 == 0x9d) - { - c1 = *mic++; /* get plane no. */ - } - cnsBuf = (*mic++) << 8; - cnsBuf |= (*mic++) & 0x00ff; - big5buf = CNStoBIG5(cnsBuf, c1); - if (big5buf == 0) - { /* cannot convert to Big5! */ - mic -= l; - printBogusChar(&mic, &p); - } - else - { - *p++ = (big5buf >> 8) & 0x00ff; - *p++ = big5buf & 0x00ff; - } - } - else if (c1 <= 0x7f) /* ASCII */ - *p++ = c1; - else - { /* cannot convert to Big5! */ - mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); } } *p = '\0'; } +#endif /* * LATINn ---> MIC @@ -806,7 +248,7 @@ latin2mic(unsigned char *l, unsigned char *p, int len, int lc) while (len-- > 0 && (c1 = *l++)) { if (c1 > 0x7f) - { /* Latin1? */ + { /* Latin? */ *p++ = lc; } *p++ = c1; @@ -831,7 +273,7 @@ mic2latin(unsigned char *mic, unsigned char *p, int len, int lc) else if (c1 > 0x7f) { mic--; - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); } else { /* should be ASCII */ @@ -882,24 +324,11 @@ mic2latin4(unsigned char *mic, unsigned char *p, int len) mic2latin(mic, p, len, LC_ISO8859_4); } -#ifdef NOT_USED -static void -latin52mic(unsigned char *l, unsigned char *p, int len) -{ - latin2mic(l, p, len, LC_ISO8859_5); -} -static void -mic2latin5(unsigned char *mic, unsigned char *p, int len) -{ - mic2latin(mic, p, len, LC_ISO8859_5); -} -#endif - /* * ASCII ---> MIC */ -static void -ascii2mic(unsigned char *l, unsigned char *p, int len) +void +pg_ascii2mic(unsigned char *l, unsigned char *p, int len) { int c1; @@ -911,15 +340,15 @@ ascii2mic(unsigned char *l, unsigned char *p, int len) /* * MIC ---> ASCII */ -static void -mic2ascii(unsigned char *mic, unsigned char *p, int len) +void +pg_mic2ascii(unsigned char *mic, unsigned char *p, int len) { int c1; while (len-- > 0 && (c1 = *mic)) { if (c1 > 0x7f) - printBogusChar(&mic, &p); + pg_print_bogus_char(&mic, &p); else { /* should be ASCII */ *p++ = c1; @@ -1257,82 +686,6 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len) mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250); } -#ifdef UNICODE_CONVERSION - -/* - * UNICODE(UTF-8) support - */ - -/* - * ASCII ---> UTF-8 - */ -static void -ascii2utf(unsigned char *ascii, unsigned char *utf, int len) -{ - ascii2mic(ascii, utf, len); -} - -/* - * UTF-8 ---> ASCII - */ -static void -utf2ascii(unsigned char *utf, unsigned char *ascii, int len) -{ - mic2ascii(utf, ascii, len); -} - -/* - * ISO8859-1 ---> UTF-8 - */ -static void -iso8859_1_to_utf(unsigned char *iso, unsigned char *utf, int len) -{ - unsigned short c; - - while (len-- > 0 && (c = *iso++)) - { - if (c < 0x80) - *utf++ = c; - else - { - *utf++ = (c >> 6) | 0xc0; - *utf++ = (c & 0x003f) | 0x80; - } - } - *utf = '\0'; -} - -/* - * UTF-8 ---> ISO8859-1 - */ -static void -utf_to_iso8859_1(unsigned char *utf, unsigned char *iso, int len) -{ - unsigned short c, - c1, - c2; - - while (len > 0 && (c = *utf++)) - { - if ((c & 0xe0) == 0xc0) - { - c1 = c & 0x1f; - c2 = *utf++ & 0x3f; - *iso = c1 << 6; - *iso++ |= c2; - len -= 2; - } - else if ((c & 0xe0) == 0xe0) - elog(ERROR, "Could not convert UTF-8 to ISO8859-1"); - else - { - *iso++ = c; - len--; - } - } - *iso = '\0'; -} - /* * comparison routine for bsearch() * this routine is intended for UTF-8 -> local code @@ -1372,9 +725,9 @@ compare2(const void *p1, const void *p2) * map: the conversion map. * size: the size of the conversion map. */ -static void -utf_to_local(unsigned char *utf, unsigned char *iso, - pg_utf_to_local *map, int size, int len) +void +UtfToLocal(unsigned char *utf, unsigned char *iso, + pg_utf_to_local *map, int size, int len) { unsigned int iutf; int l; @@ -1419,43 +772,6 @@ utf_to_local(unsigned char *utf, unsigned char *iso, } #ifdef NOT_USED -/* - * UTF-8 ---> ISO8859-2 - */ -static void -utf_to_latin2(unsigned char *utf, unsigned char *iso, int len) -{ - utf_to_local(utf, iso, ULmapISO8859_2, sizeof(ULmapISO8859_2) / sizeof(pg_utf_to_local), len); -} - -/* - * UTF-8 ---> ISO8859-3 - */ -static void -utf_to_latin3(unsigned char *utf, unsigned char *iso, int len) -{ - utf_to_local(utf, iso, ULmapISO8859_3, sizeof(ULmapISO8859_3) / sizeof(pg_utf_to_local), len); -} - -/* - * UTF-8 ---> ISO8859-4 - */ -static void -utf_to_latin4(unsigned char *utf, unsigned char *iso, int len) -{ - utf_to_local(utf, iso, ULmapISO8859_4, sizeof(ULmapISO8859_4) / sizeof(pg_utf_to_local), len); -} - -/* - * UTF-8 ---> ISO8859-5 - */ -static void -utf_to_latin5(unsigned char *utf, unsigned char *iso, int len) -{ - utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local), len); -} -#endif /* NOT_USED */ - /* * Cyrillic charsets */ @@ -1490,11 +806,13 @@ utf_to_ALT(unsigned char *utf, unsigned char *iso, int len) utf_to_local(utf, iso, ULmap_ALT, sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), len); } +#endif + /* * local code ---> UTF-8 */ -static void -local_to_utf(unsigned char *iso, unsigned char *utf, +void +LocalToUtf(unsigned char *iso, unsigned char *utf, pg_local_to_utf *map, int size, int encoding, int len) { unsigned int iiso; @@ -1556,69 +874,6 @@ local_to_utf(unsigned char *iso, unsigned char *utf, } #ifdef NOT_USED -/* - * ISO-8859-2 ---> UTF-8 - */ -static void -latin2_to_utf(unsigned char *iso, unsigned char *utf, int len) -{ - local_to_utf(iso, utf, LUmapISO8859_2, sizeof(LUmapISO8859_2) / sizeof(pg_local_to_utf), PG_LATIN2, len); -} - -/* - * ISO-8859-3 ---> UTF-8 - */ -static void -latin3_to_utf(unsigned char *iso, unsigned char *utf, int len) -{ - local_to_utf(iso, utf, LUmapISO8859_3, sizeof(LUmapISO8859_3) / sizeof(pg_local_to_utf), PG_LATIN3, len); -} - -/* - * ISO-8859-4 ---> UTF-8 - */ -static void -latin4_to_utf(unsigned char *iso, unsigned char *utf, int len) -{ - local_to_utf(iso, utf, LUmapISO8859_4, sizeof(LUmapISO8859_4) / sizeof(pg_local_to_utf), PG_LATIN4, len); -} - -/* - * ISO-8859-5 ---> UTF-8 - */ -static void -latin5_to_utf(unsigned char *iso, unsigned char *utf, int len) -{ - local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf), PG_LATIN5, len); -} -#endif /* NOT_USED */ - -#define UTF_ISO8859(_id_) \ -static void \ -utf_to_iso8859_##_id_(unsigned char *utf, unsigned char *iso, int len) \ -{ \ - utf_to_local(utf, iso, ULmapISO8859_##_id_, sizeof(ULmapISO8859_##_id_) / sizeof(pg_utf_to_local), len); \ -} \ -static void \ -iso8859_##_id_##_to_utf(unsigned char *iso, unsigned char *utf, int len) \ -{ \ - local_to_utf(iso, utf, LUmapISO8859_##_id_, sizeof(LUmapISO8859_##_id_) / sizeof(pg_local_to_utf), PG_LATIN1, len); \ -} - -UTF_ISO8859(2); -UTF_ISO8859(3); -UTF_ISO8859(4); -UTF_ISO8859(5); -UTF_ISO8859(6); -UTF_ISO8859(7); -UTF_ISO8859(8); -UTF_ISO8859(9); -UTF_ISO8859(10); -UTF_ISO8859(13); -UTF_ISO8859(14); -UTF_ISO8859(15); -UTF_ISO8859(16); - /* * KOI8-R ---> UTF-8 */ @@ -1646,215 +901,6 @@ ALT_to_utf(unsigned char *iso, unsigned char *utf, int len) local_to_utf(iso, utf, LUmapALT, sizeof(LUmapALT) / sizeof(pg_local_to_utf), PG_ALT, len); } -/* - * UTF-8 ---> EUC_JP - */ -static void -utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapEUC_JP, - sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len); -} - -/* - * EUC_JP ---> UTF-8 - */ -static void -euc_jp_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapEUC_JP, - sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len); -} - -/* - * UTF-8 ---> EUC_CN - */ -static void -utf_to_euc_cn(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapEUC_CN, - sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len); -} - -/* - * EUC_CN ---> UTF-8 - */ -static void -euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapEUC_CN, - sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len); -} - -/* - * UTF-8 ---> GB18030 - */ -static void -utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapGB18030, - sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len); -} - -/* - * GB18030 ---> UTF-8 - */ -static void -gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapGB18030, - sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len); -} -/* - * UTF-8 ---> EUC_KR - */ -static void -utf_to_euc_kr(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapEUC_KR, - sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len); -} - -/* - * EUC_KR ---> UTF-8 - */ -static void -euc_kr_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapEUC_KR, - sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len); -} - -/* - * UTF-8 ---> EUC_TW - */ -static void -utf_to_euc_tw(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapEUC_TW, - sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len); -} - -/* - * EUC_TW ---> UTF-8 - */ -static void -euc_tw_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapEUC_TW, - sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len); -} - -/* - * UTF-8 ---> SJIS - */ -static void -utf_to_sjis(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapSJIS, - sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len); -} - -/* - * SJIS ---> UTF-8 - */ -static void -sjis_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapSJIS, - sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len); -} - -/* - * UTF-8 ---> BIG5 - */ -static void -utf_to_big5(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapBIG5, - sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len); -} - -/* - * BIG5 ---> UTF-8 - */ -static void -big5_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapBIG5, - sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len); -} - -/* - * UTF-8 ---> GBK - */ -static void -utf_to_gbk(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapGBK, - sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len); -} - -/* - * GBK ---> UTF-8 - */ -static void -gbk_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapGBK, - sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len); -} - -/* - * UTF-8 ---> UHC - */ -static void -utf_to_uhc(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapUHC, - sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len); -} - -/* - * UHC ---> UTF-8 - */ -static void -uhc_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapUHC, - sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len); -} - -/* - * UTF-8 ---> JOHAB - */ -static void -utf_to_johab(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapJOHAB, - sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len); -} - -/* - * JOHAB ---> UTF-8 - */ -static void -johab_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapJOHAB, - sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len); -} - /* * UTF-8 ---> WIN1250 */ @@ -1897,27 +943,6 @@ win1256_to_utf(unsigned char *euc, unsigned char *utf, int len) sizeof(LUmapWIN1256) / sizeof(pg_local_to_utf), PG_WIN1256, len); } -/* - * UTF-8 ---> TCVN - */ -static void -utf_to_tcvn(unsigned char *utf, unsigned char *euc, int len) - -{ - utf_to_local(utf, euc, ULmapTCVN, - sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), len); -} - -/* - * TCVN ---> UTF-8 - */ -static void -tcvn_to_utf(unsigned char *euc, unsigned char *utf, int len) -{ - local_to_utf(euc, utf, LUmapTCVN, - sizeof(LUmapTCVN) / sizeof(pg_local_to_utf), PG_TCVN, len); -} - /* * UTF-8 ---> WIN874 */ @@ -1939,213 +964,4 @@ win874_to_utf(unsigned char *euc, unsigned char *utf, int len) sizeof(LUmapWIN874) / sizeof(pg_local_to_utf), PG_WIN874, len); } -/* ---------- - * Encoding conversion routines - * - * WARINIG: must by same order as pg_enc in include/mb/pg_wchar.h! - * ---------- - */ -pg_enconv pg_enconv_tbl[] = -{ - { - PG_SQL_ASCII, ascii2mic, mic2ascii, ascii2utf, utf2ascii - }, - { - PG_EUC_JP, euc_jp2mic, mic2euc_jp, euc_jp_to_utf, utf_to_euc_jp - }, - { - PG_EUC_CN, euc_cn2mic, mic2euc_cn, euc_cn_to_utf, utf_to_euc_cn - }, - { - PG_EUC_KR, euc_kr2mic, mic2euc_kr, euc_kr_to_utf, utf_to_euc_kr - }, - { - PG_EUC_TW, euc_tw2mic, mic2euc_tw, euc_tw_to_utf, utf_to_euc_tw - }, - { - PG_JOHAB, 0, 0, johab_to_utf, utf_to_johab - }, - { - PG_UTF8, 0, 0, 0, 0 - }, - { - PG_MULE_INTERNAL, 0, 0, 0, 0 - }, - { - PG_LATIN1, latin12mic, mic2latin1, iso8859_1_to_utf, utf_to_iso8859_1 - }, - { - PG_LATIN2, latin22mic, mic2latin2, iso8859_2_to_utf, utf_to_iso8859_2 - }, - { - PG_LATIN3, latin32mic, mic2latin3, iso8859_3_to_utf, utf_to_iso8859_3 - }, - { - PG_LATIN4, latin42mic, mic2latin4, iso8859_4_to_utf, utf_to_iso8859_4 - }, - { - PG_LATIN5, iso2mic, mic2iso, iso8859_9_to_utf, utf_to_iso8859_9 - }, - { - PG_LATIN6, 0, 0, iso8859_10_to_utf, utf_to_iso8859_10 - }, - { - PG_LATIN7, 0, 0, iso8859_13_to_utf, utf_to_iso8859_13 - }, - { - PG_LATIN8, 0, 0, iso8859_14_to_utf, utf_to_iso8859_14 - }, - { - PG_LATIN9, 0, 0, iso8859_15_to_utf, utf_to_iso8859_15 - }, - { - PG_LATIN10, 0, 0, iso8859_16_to_utf, utf_to_iso8859_16 - }, - { - PG_WIN1256, 0, 0, win1256_to_utf, utf_to_win1256 - }, - { - PG_TCVN, 0, 0, tcvn_to_utf, utf_to_tcvn - }, - { - PG_WIN874, 0, 0, win874_to_utf, utf_to_win874 - }, - { - PG_KOI8R, koi8r2mic, mic2koi8r, KOI8R_to_utf, utf_to_KOI8R - }, - { - PG_WIN1251, win12512mic, mic2win1251, WIN1251_to_utf, utf_to_WIN1251 - }, - { - PG_ALT, alt2mic, mic2alt, ALT_to_utf, utf_to_ALT - }, - { - PG_ISO_8859_5, 0, 0, iso8859_5_to_utf, utf_to_iso8859_5 - }, - { - PG_ISO_8859_6, 0, 0, iso8859_6_to_utf, utf_to_iso8859_6 - }, - { - PG_ISO_8859_7, 0, 0, iso8859_7_to_utf, utf_to_iso8859_7 - }, - { - PG_ISO_8859_8, 0, 0, iso8859_8_to_utf, utf_to_iso8859_8 - }, - - { - PG_SJIS, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis - }, - { - PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5 - }, - { - PG_GBK, 0, 0, gbk_to_utf, utf_to_gbk - }, - { - PG_UHC, 0, 0, uhc_to_utf, utf_to_uhc - }, - { - PG_WIN1250, win12502mic, mic2win1250, win1250_to_utf, utf_to_win1250 - }, - { - PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030 - }, -}; - -#else - -pg_enconv pg_enconv_tbl[] = -{ - { - PG_SQL_ASCII, ascii2mic, mic2ascii, 0, 0 - }, - { - PG_EUC_JP, euc_jp2mic, mic2euc_jp, 0, 0 - }, - { - PG_EUC_CN, euc_cn2mic, mic2euc_cn, 0, 0 - }, - { - PG_EUC_KR, euc_kr2mic, mic2euc_kr, 0, 0 - }, - { - PG_EUC_TW, euc_tw2mic, mic2euc_tw, 0, 0 - }, - { - PG_UTF8, 0, 0, 0, 0 - }, - { - PG_MULE_INTERNAL, 0, 0, 0, 0 - }, - { - PG_LATIN1, latin12mic, mic2latin1, 0, 0 - }, - { - PG_LATIN2, latin22mic, mic2latin2, 0, 0 - }, - { - PG_LATIN3, latin32mic, mic2latin3, 0, 0 - }, - { - PG_LATIN4, latin42mic, mic2latin4, 0, 0 - }, - { - PG_LATIN5, iso2mic, mic2iso, 0, 0 - }, - { - PG_LATIN6, 0, 0, 0, 0 - }, - { - PG_LATIN7, 0, 0, 0, 0 - }, - { - PG_LATIN8, 0, 0, 0, 0 - }, - { - PG_LATIN9, 0, 0, 0, 0 - }, - { - PG_LATIN10, 0, 0, 0, 0 - }, - { - PG_KOI8R, koi8r2mic, mic2koi8r, 0, 0 - }, - { - PG_WIN1251, win12512mic, mic2win1251, 0, 0 - }, - { - PG_ALT, alt2mic, mic2alt, 0, 0 - }, - { - PG_ISO_8859_5, 0, 0, 0, 0 - }, - { - PG_ISO_8859_6, 0, 0, 0, 0 - }, - { - PG_ISO_8859_7, 0, 0, 0, 0 - }, - { - PG_ISO_8859_8, 0, 0, 0, 0 - }, - { - PG_SJIS, sjis2mic, mic2sjis, 0, 0 - }, - { - PG_BIG5, big52mic, mic2big5, 0, 0 - }, - { - PG_GBK, 0, 0, 0, 0 - }, - { - PG_UHC, 0, 0, 0, 0 - }, - { - PG_WIN1250, win12502mic, mic2win1250, 0, 0 - }, - { - PG_GB18030, gb180302mic, mic2gb18030, 0, 0 - }, -}; - -#endif /* UNICODE_CONVERSION */ +#endif diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 43fd05f5985..63d7398d774 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -3,136 +3,65 @@ * client encoding and server internal encoding. * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: mbutils.c,v 1.27 2001/11/20 01:32:29 ishii Exp $ + * $Id: mbutils.c,v 1.28 2002/07/18 02:02:30 ishii Exp $ */ #include "postgres.h" #include "miscadmin.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" +#include "utils/syscache.h" +#include "catalog/namespace.h" /* * We handle for actual FE and BE encoding setting encoding-identificator * and encoding-name too. It prevent searching and conversion from encoding * to encoding name in getdatabaseencoding() and other routines. - * - * Default is PG_SQL_ASCII encoding (but this is never used, because - * backend during startup init it by SetDatabaseEncoding()). - * - * Karel Zak (Aug 2001) */ static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; -static to_mic_converter client_to_mic; /* something to MIC */ -static from_mic_converter client_from_mic; /* MIC to something */ -static to_mic_converter server_to_mic; /* something to MIC */ -static from_mic_converter server_from_mic; /* MIC to something */ - -/* - * find encoding table entry by encoding - */ -pg_enconv * -pg_get_enconv_by_encoding(int encoding) -{ - if (PG_VALID_ENCODING(encoding)) - { - Assert((&pg_enconv_tbl[encoding])->encoding == encoding); - return &pg_enconv_tbl[encoding]; - } - return 0; -} - -/* - * Find appropriate encoding conversion functions. If no such - * functions found, returns -1. - * - * Arguments: - * - * src, dest (in): source and destination encoding ids - * - * src_to_mic (out): pointer to a function which converts src to - * mic/unicode according to dest. if src == mic/unicode or no - * appropriate function found, set to 0. - * - * dest_from_mic (out): pointer to a function which converts - * mic/unicode to dest according to src. if dest == mic/unicode or no - * appropriate function found, set to 0. - */ -int -pg_find_encoding_converters(int src, int dest, - to_mic_converter *src_to_mic, - from_mic_converter *dest_from_mic) -{ - if (src == dest) - { /* src == dest? */ - *src_to_mic = *dest_from_mic = 0; - } - else if (src == PG_MULE_INTERNAL) - { /* src == MULE_INETRNAL? */ - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_mic; - if (*dest_from_mic == 0) - return (-1); - *src_to_mic = 0; - } - else if (dest == PG_MULE_INTERNAL) - { /* dest == MULE_INETRNAL? */ - *src_to_mic = pg_get_enconv_by_encoding(src)->to_mic; - if (*src_to_mic == 0) - return (-1); - *dest_from_mic = 0; - } - else if (src == PG_UTF8) - { /* src == UNICODE? */ - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_unicode; - if (*dest_from_mic == 0) - return (-1); - *src_to_mic = 0; - } - else if (dest == PG_UTF8) - { /* dest == UNICODE? */ - *src_to_mic = pg_get_enconv_by_encoding(src)->to_unicode; - if (*src_to_mic == 0) - return (-1); - *dest_from_mic = 0; - } - else - { - *src_to_mic = pg_get_enconv_by_encoding(src)->to_mic; - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_mic; - if (*src_to_mic == 0 || *dest_from_mic == 0) - return (-1); - } - return (0); -} - /* * set the client encoding. if encoding conversion between * client/server encoding is not supported, returns -1 */ int -pg_set_client_encoding(int encoding) +SetClientEncoding(int encoding, bool doit) { - int current_server_encoding = DatabaseEncoding->encoding; + int current_server_encoding; + + current_server_encoding = GetDatabaseEncoding(); if (!PG_VALID_FE_ENCODING(encoding)) return (-1); - if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0) - return (-1); + if (current_server_encoding == encoding) + { + ClientEncoding = &pg_enc2name_tbl[encoding]; + return 0; + } + + /* XXX We cannot use FindDefaultConversionProc() while in + * bootstrap or initprocessing mode since namespace functions will + * not work. + */ + if (IsNormalProcessingMode()) + { + if (!OidIsValid(FindDefaultConversionProc(encoding, current_server_encoding)) || + !OidIsValid(FindDefaultConversionProc(current_server_encoding, encoding))) + return (-1); + } + + if (!doit) + return 0; ClientEncoding = &pg_enc2name_tbl[encoding]; - Assert(ClientEncoding->encoding == encoding); - - if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0) - return (-1); return 0; } /* - * returns the current client encoding - */ + * returns the current client encoding */ int pg_get_client_encoding(void) { @@ -151,55 +80,61 @@ pg_get_client_encoding_name(void) } /* - * Convert src encoding and returns it. Actual conversion is done by - * src_to_mic and dest_from_mic, which can be obtained by - * pg_find_encoding_converters(). The reason we require two conversion - * functions is that we have an intermediate encoding: MULE_INTERNAL - * Using intermediate encodings will reduce the number of functions - * doing encoding conversions. Special case is either src or dest is - * the intermediate encoding itself. In this case, you don't need src - * or dest (setting 0 will indicate there's no conversion - * function). Another case is you have direct-conversion function from - * src to dest. In this case either src_to_mic or dest_from_mic could - * be set to 0 also. + * Apply encoding conversion on src and return it. The encoding + * conversion function is chosen from the pg_conversion system catalog + * marked as "default". If it is not found in the schema search path, + * it's taken from pg_catalog schema. If it even is not in the schema, + * warn and returns src. We cannot raise an error, since it will cause + * an infinit loop in error message sending. * - * Note that If src or dest is UNICODE, we have to do - * direct-conversion, since we don't support conversion bwteen UNICODE - * and MULE_INTERNAL, we cannot go through MULE_INTERNAL. + * In the case of no coversion, src is returned. * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? */ + * XXX We assume that storage for converted result is 4-to-1 growth in + * the worst case. The rate for currently supported encoding pares are within 3 + * (SJIS JIS X0201 half width kanna -> UTF-8 is the worst case). + * So "4" should be enough for the moment. + */ unsigned char * pg_do_encoding_conversion(unsigned char *src, int len, - to_mic_converter src_to_mic, - from_mic_converter dest_from_mic) + int src_encoding, int dest_encoding) { - unsigned char *result = src; - unsigned char *buf; + unsigned char *result; + Oid proc; - if (src_to_mic) + if (src_encoding == dest_encoding) + return src; + + proc = FindDefaultConversionProc(src_encoding, dest_encoding); + if (!OidIsValid(proc)) { - buf = (unsigned char *) palloc(len * 4 + 1); - (*src_to_mic) (result, buf, len); - result = buf; - len = strlen(result); + elog(LOG, "default conversion proc for %s to %s not found", + pg_encoding_to_char(src_encoding), pg_encoding_to_char(dest_encoding)); + return src; } - if (dest_from_mic) + + /* XXX we shoud avoid throwing errors in OidFuctionCall. Otherwise + * we are going into inifinite loop! So we have to make sure that + * the function exists before calling OidFunctionCall. + */ + if (!SearchSysCacheExists(PROCOID, + ObjectIdGetDatum(proc), + 0, 0, 0)) { - buf = (unsigned char *) palloc(len * 4 + 1); - (*dest_from_mic) (result, buf, len); - if (result != src) - pfree(result); /* release first buffer */ - result = buf; + elog(LOG, "default conversion proc %u for %s to %s not found in pg_proc", + proc, + pg_encoding_to_char(src_encoding), pg_encoding_to_char(dest_encoding)); + return src; } + + result = palloc(len * 4 + 1); + + OidFunctionCall5(proc, + Int32GetDatum(src_encoding), + Int32GetDatum(dest_encoding), + CStringGetDatum(src), + CStringGetDatum(result), + Int32GetDatum(len)); return result; } @@ -207,8 +142,7 @@ pg_do_encoding_conversion(unsigned char *src, int len, * Convert string using encoding_nanme. We assume that string's * encoding is same as DB encoding. * - * TEXT convert(TEXT string, NAME encoding_name) - */ + * TEXT convert(TEXT string, NAME encoding_name) */ Datum pg_convert(PG_FUNCTION_ARGS) { @@ -230,7 +164,7 @@ pg_convert(PG_FUNCTION_ARGS) /* * Convert string using encoding_nanme. * - * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name) + * TEXT convert2(TEXT string, NAME src_encoding_name, NAME dest_encoding_name) */ Datum pg_convert2(PG_FUNCTION_ARGS) @@ -240,8 +174,6 @@ pg_convert2(PG_FUNCTION_ARGS) int src_encoding = pg_char_to_encoding(src_encoding_name); char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); int dest_encoding = pg_char_to_encoding(dest_encoding_name); - to_mic_converter src; - from_mic_converter dest; unsigned char *result; text *retval; unsigned char *str; @@ -252,19 +184,13 @@ pg_convert2(PG_FUNCTION_ARGS) if (dest_encoding < 0) elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name); - if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0) - { - elog(ERROR, "Conversion from %s to %s is not possible", - src_encoding_name, dest_encoding_name); - } - /* make sure that source string is null terminated */ len = VARSIZE(string) - VARHDRSZ; str = palloc(len + 1); memcpy(str, VARDATA(string), len); *(str + len) = '\0'; - result = pg_do_encoding_conversion(str, len, src, dest); + result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding); if (result == NULL) elog(ERROR, "Encoding conversion failed"); @@ -288,16 +214,6 @@ pg_convert2(PG_FUNCTION_ARGS) /* * convert client encoding to server encoding. - * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? */ unsigned char * pg_client_to_server(unsigned char *s, int len) @@ -308,21 +224,12 @@ pg_client_to_server(unsigned char *s, int len) if (ClientEncoding->encoding == DatabaseEncoding->encoding) return s; - return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic); + return pg_do_encoding_conversion(s, len, ClientEncoding->encoding, + DatabaseEncoding->encoding); } /* * convert server encoding to client encoding. - * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? */ unsigned char * pg_server_to_client(unsigned char *s, int len) @@ -333,7 +240,8 @@ pg_server_to_client(unsigned char *s, int len) if (ClientEncoding->encoding == DatabaseEncoding->encoding) return s; - return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic); + return pg_do_encoding_conversion(s, len, DatabaseEncoding->encoding, + ClientEncoding->encoding); } /* convert a multi-byte string to a wchar */ @@ -448,6 +356,12 @@ SetDatabaseEncoding(int encoding) Assert(DatabaseEncoding->encoding == encoding); } +void +SetDefaultClientEncoding() +{ + ClientEncoding = &pg_enc2name_tbl[GetDatabaseEncoding()]; +} + int GetDatabaseEncoding(void) { diff --git a/src/backend/utils/mb/sjis.map b/src/backend/utils/mb/sjis.map deleted file mode 100644 index cfcfaefb06d..00000000000 --- a/src/backend/utils/mb/sjis.map +++ /dev/null @@ -1,396 +0,0 @@ -static struct -{ - unsigned short int nec; /* SJIS UDC (NEC selection IBM kanji) */ - unsigned short int sjis; /* SJIS UDC (IBM kanji) */ - int euc; /* EUC_JP */ -} ibmkanji[] = { -{ 0xEEEF , 0xfa40 , 0x8ff3f3 }, -{ 0xEEF0 , 0xfa41 , 0x8ff3f4 }, -{ 0xEEF1 , 0xfa42 , 0x8ff3f5 }, -{ 0xEEF2 , 0xfa43 , 0x8ff3f6 }, -{ 0xEEF3 , 0xfa44 , 0x8ff3f7 }, -{ 0xEEF4 , 0xfa45 , 0x8ff3f8 }, -{ 0xEEF5 , 0xfa46 , 0x8ff3f9 }, -{ 0xEEF6 , 0xfa47 , 0x8ff3fa }, -{ 0xEEF7 , 0xfa48 , 0x8ff3fb }, -{ 0xEEF8 , 0xfa49 , 0x8ff3fc }, -{ 0x8754 , 0xfa4a , 0x8ff3fd }, -{ 0x8755 , 0xfa4b , 0x8ff3fe }, -{ 0x8756 , 0xfa4c , 0x8ff4a1 }, -{ 0x8757 , 0xfa4d , 0x8ff4a2 }, -{ 0x8758 , 0xfa4e , 0x8ff4a3 }, -{ 0x8759 , 0xfa4f , 0x8ff4a4 }, -{ 0x875A , 0xfa50 , 0x8ff4a5 }, -{ 0x875B , 0xfa51 , 0x8ff4a6 }, -{ 0x875C , 0xfa52 , 0x8ff4a7 }, -{ 0x875D , 0xfa53 , 0x8ff4a8 }, -{ 0xEEF9 , 0xfa54 , 0xa2cc }, -{ 0xEEFA , 0xfa55 , 0x8fa2c3 }, -{ 0xEEFB , 0xfa56 , 0x8ff4a9 }, -{ 0xEEFC , 0xfa57 , 0x8ff4aa }, -{ 0x878A , 0xfa58 , 0x8ff4ab }, -{ 0x8782 , 0xfa59 , 0x8ff4ac }, -{ 0x8784 , 0xfa5a , 0x8ff4ad }, -{ 0x879A , 0xfa5b , 0xa2e8 }, -{ 0xED40 , 0xfa5c , 0x8fd4e3 }, -{ 0xED41 , 0xfa5d , 0x8fdcdf }, -{ 0xED42 , 0xfa5e , 0x8fe4e9 }, -{ 0xED43 , 0xfa5f , 0x8fe3f8 }, -{ 0xED44 , 0xfa60 , 0x8fd9a1 }, -{ 0xED45 , 0xfa61 , 0x8fb1bb }, -{ 0xED46 , 0xfa62 , 0x8ff4ae }, -{ 0xED47 , 0xfa63 , 0x8fc2ad }, -{ 0xED48 , 0xfa64 , 0x8fc3fc }, -{ 0xED49 , 0xfa65 , 0x8fe4d0 }, -{ 0xED4A , 0xfa66 , 0x8fc2bf }, -{ 0xED4B , 0xfa67 , 0x8fbcf4 }, -{ 0xED4C , 0xfa68 , 0x8fb0a9 }, -{ 0xED4D , 0xfa69 , 0x8fb0c8 }, -{ 0xED4E , 0xfa6a , 0x8ff4af }, -{ 0xED4F , 0xfa6b , 0x8fb0d2 }, -{ 0xED50 , 0xfa6c , 0x8fb0d4 }, -{ 0xED51 , 0xfa6d , 0x8fb0e3 }, -{ 0xED52 , 0xfa6e , 0x8fb0ee }, -{ 0xED53 , 0xfa6f , 0x8fb1a7 }, -{ 0xED54 , 0xfa70 , 0x8fb1a3 }, -{ 0xED55 , 0xfa71 , 0x8fb1ac }, -{ 0xED56 , 0xfa72 , 0x8fb1a9 }, -{ 0xED57 , 0xfa73 , 0x8fb1be }, -{ 0xED58 , 0xfa74 , 0x8fb1df }, -{ 0xED59 , 0xfa75 , 0x8fb1d8 }, -{ 0xED5A , 0xfa76 , 0x8fb1c8 }, -{ 0xED5B , 0xfa77 , 0x8fb1d7 }, -{ 0xED5C , 0xfa78 , 0x8fb1e3 }, -{ 0xED5D , 0xfa79 , 0x8fb1f4 }, -{ 0xED5E , 0xfa7a , 0x8fb1e1 }, -{ 0xED5F , 0xfa7b , 0x8fb2a3 }, -{ 0xED60 , 0xfa7c , 0x8ff4b0 }, -{ 0xED61 , 0xfa7d , 0x8fb2bb }, -{ 0xED62 , 0xfa7e , 0x8fb2e6 }, -{ 0xED63 , 0xfa80 , 0x8fb2ed }, -{ 0xED64 , 0xfa81 , 0x8fb2f5 }, -{ 0xED65 , 0xfa82 , 0x8fb2fc }, -{ 0xED66 , 0xfa83 , 0x8ff4b1 }, -{ 0xED67 , 0xfa84 , 0x8fb3b5 }, -{ 0xED68 , 0xfa85 , 0x8fb3d8 }, -{ 0xED69 , 0xfa86 , 0x8fb3db }, -{ 0xED6A , 0xfa87 , 0x8fb3e5 }, -{ 0xED6B , 0xfa88 , 0x8fb3ee }, -{ 0xED6C , 0xfa89 , 0x8fb3fb }, -{ 0xED6D , 0xfa8a , 0x8ff4b2 }, -{ 0xED6E , 0xfa8b , 0x8ff4b3 }, -{ 0xED6F , 0xfa8c , 0x8fb4c0 }, -{ 0xED70 , 0xfa8d , 0x8fb4c7 }, -{ 0xED71 , 0xfa8e , 0x8fb4d0 }, -{ 0xED72 , 0xfa8f , 0x8fb4de }, -{ 0xED73 , 0xfa90 , 0x8ff4b4 }, -{ 0xED74 , 0xfa91 , 0x8fb5aa }, -{ 0xED75 , 0xfa92 , 0x8ff4b5 }, -{ 0xED76 , 0xfa93 , 0x8fb5af }, -{ 0xED77 , 0xfa94 , 0x8fb5c4 }, -{ 0xED78 , 0xfa95 , 0x8fb5e8 }, -{ 0xED79 , 0xfa96 , 0x8ff4b6 }, -{ 0xED7A , 0xfa97 , 0x8fb7c2 }, -{ 0xED7B , 0xfa98 , 0x8fb7e4 }, -{ 0xED7C , 0xfa99 , 0x8fb7e8 }, -{ 0xED7D , 0xfa9a , 0x8fb7e7 }, -{ 0xED7E , 0xfa9b , 0x8ff4b7 }, -{ 0xED80 , 0xfa9c , 0x8ff4b8 }, -{ 0xED81 , 0xfa9d , 0x8ff4b9 }, -{ 0xED82 , 0xfa9e , 0x8fb8ce }, -{ 0xED83 , 0xfa9f , 0x8fb8e1 }, -{ 0xED84 , 0xfaa0 , 0x8fb8f5 }, -{ 0xED85 , 0xfaa1 , 0x8fb8f7 }, -{ 0xED86 , 0xfaa2 , 0x8fb8f8 }, -{ 0xED87 , 0xfaa3 , 0x8fb8fc }, -{ 0xED88 , 0xfaa4 , 0x8fb9af }, -{ 0xED89 , 0xfaa5 , 0x8fb9b7 }, -{ 0xED8A , 0xfaa6 , 0x8fbabe }, -{ 0xED8B , 0xfaa7 , 0x8fbadb }, -{ 0xED8C , 0xfaa8 , 0x8fcdaa }, -{ 0xED8D , 0xfaa9 , 0x8fbae1 }, -{ 0xED8E , 0xfaaa , 0x8ff4ba }, -{ 0xED8F , 0xfaab , 0x8fbaeb }, -{ 0xED90 , 0xfaac , 0x8fbbb3 }, -{ 0xED91 , 0xfaad , 0x8fbbb8 }, -{ 0xED92 , 0xfaae , 0x8ff4bb }, -{ 0xED93 , 0xfaaf , 0x8fbbca }, -{ 0xED94 , 0xfab0 , 0x8ff4bc }, -{ 0xED95 , 0xfab1 , 0x8ff4bd }, -{ 0xED96 , 0xfab2 , 0x8fbbd0 }, -{ 0xED97 , 0xfab3 , 0x8fbbde }, -{ 0xED98 , 0xfab4 , 0x8fbbf4 }, -{ 0xED99 , 0xfab5 , 0x8fbbf5 }, -{ 0xED9A , 0xfab6 , 0x8fbbf9 }, -{ 0xED9B , 0xfab7 , 0x8fbce4 }, -{ 0xED9C , 0xfab8 , 0x8fbced }, -{ 0xED9D , 0xfab9 , 0x8fbcfe }, -{ 0xED9E , 0xfaba , 0x8ff4be }, -{ 0xED9F , 0xfabb , 0x8fbdc2 }, -{ 0xEDA0 , 0xfabc , 0x8fbde7 }, -{ 0xEDA1 , 0xfabd , 0x8ff4bf }, -{ 0xEDA2 , 0xfabe , 0x8fbdf0 }, -{ 0xEDA3 , 0xfabf , 0x8fbeb0 }, -{ 0xEDA4 , 0xfac0 , 0x8fbeac }, -{ 0xEDA5 , 0xfac1 , 0x8ff4c0 }, -{ 0xEDA6 , 0xfac2 , 0x8fbeb3 }, -{ 0xEDA7 , 0xfac3 , 0x8fbebd }, -{ 0xEDA8 , 0xfac4 , 0x8fbecd }, -{ 0xEDA9 , 0xfac5 , 0x8fbec9 }, -{ 0xEDAA , 0xfac6 , 0x8fbee4 }, -{ 0xEDAB , 0xfac7 , 0x8fbfa8 }, -{ 0xEDAC , 0xfac8 , 0x8fbfc9 }, -{ 0xEDAD , 0xfac9 , 0x8fc0c4 }, -{ 0xEDAE , 0xfaca , 0x8fc0e4 }, -{ 0xEDAF , 0xfacb , 0x8fc0f4 }, -{ 0xEDB0 , 0xfacc , 0x8fc1a6 }, -{ 0xEDB1 , 0xfacd , 0x8ff4c1 }, -{ 0xEDB2 , 0xface , 0x8fc1f5 }, -{ 0xEDB3 , 0xfacf , 0x8fc1fc }, -{ 0xEDB4 , 0xfad0 , 0x8ff4c2 }, -{ 0xEDB5 , 0xfad1 , 0x8fc1f8 }, -{ 0xEDB6 , 0xfad2 , 0x8fc2ab }, -{ 0xEDB7 , 0xfad3 , 0x8fc2a1 }, -{ 0xEDB8 , 0xfad4 , 0x8fc2a5 }, -{ 0xEDB9 , 0xfad5 , 0x8ff4c3 }, -{ 0xEDBA , 0xfad6 , 0x8fc2b8 }, -{ 0xEDBB , 0xfad7 , 0x8fc2ba }, -{ 0xEDBC , 0xfad8 , 0x8ff4c4 }, -{ 0xEDBD , 0xfad9 , 0x8fc2c4 }, -{ 0xEDBE , 0xfada , 0x8fc2d2 }, -{ 0xEDBF , 0xfadb , 0x8fc2d7 }, -{ 0xEDC0 , 0xfadc , 0x8fc2db }, -{ 0xEDC1 , 0xfadd , 0x8fc2de }, -{ 0xEDC2 , 0xfade , 0x8fc2ed }, -{ 0xEDC3 , 0xfadf , 0x8fc2f0 }, -{ 0xEDC4 , 0xfae0 , 0x8ff4c5 }, -{ 0xEDC5 , 0xfae1 , 0x8fc3a1 }, -{ 0xEDC6 , 0xfae2 , 0x8fc3b5 }, -{ 0xEDC7 , 0xfae3 , 0x8fc3c9 }, -{ 0xEDC8 , 0xfae4 , 0x8fc3b9 }, -{ 0xEDC9 , 0xfae5 , 0x8ff4c6 }, -{ 0xEDCA , 0xfae6 , 0x8fc3d8 }, -{ 0xEDCB , 0xfae7 , 0x8fc3fe }, -{ 0xEDCC , 0xfae8 , 0x8ff4c7 }, -{ 0xEDCD , 0xfae9 , 0x8fc4cc }, -{ 0xEDCE , 0xfaea , 0x8ff4c8 }, -{ 0xEDCF , 0xfaeb , 0x8fc4d9 }, -{ 0xEDD0 , 0xfaec , 0x8fc4ea }, -{ 0xEDD1 , 0xfaed , 0x8fc4fd }, -{ 0xEDD2 , 0xfaee , 0x8ff4c9 }, -{ 0xEDD3 , 0xfaef , 0x8fc5a7 }, -{ 0xEDD4 , 0xfaf0 , 0x8fc5b5 }, -{ 0xEDD5 , 0xfaf1 , 0x8fc5b6 }, -{ 0xEDD6 , 0xfaf2 , 0x8ff4ca }, -{ 0xEDD7 , 0xfaf3 , 0x8fc5d5 }, -{ 0xEDD8 , 0xfaf4 , 0x8fc6b8 }, -{ 0xEDD9 , 0xfaf5 , 0x8fc6d7 }, -{ 0xEDDA , 0xfaf6 , 0x8fc6e0 }, -{ 0xEDDB , 0xfaf7 , 0x8fc6ea }, -{ 0xEDDC , 0xfaf8 , 0x8fc6e3 }, -{ 0xEDDD , 0xfaf9 , 0x8fc7a1 }, -{ 0xEDDE , 0xfafa , 0x8fc7ab }, -{ 0xEDDF , 0xfafb , 0x8fc7c7 }, -{ 0xEDE0 , 0xfafc , 0x8fc7c3 }, -{ 0xEDE1 , 0xfb40 , 0x8fc7cb }, -{ 0xEDE2 , 0xfb41 , 0x8fc7cf }, -{ 0xEDE3 , 0xfb42 , 0x8fc7d9 }, -{ 0xEDE4 , 0xfb43 , 0x8ff4cb }, -{ 0xEDE5 , 0xfb44 , 0x8ff4cc }, -{ 0xEDE6 , 0xfb45 , 0x8fc7e6 }, -{ 0xEDE7 , 0xfb46 , 0x8fc7ee }, -{ 0xEDE8 , 0xfb47 , 0x8fc7fc }, -{ 0xEDE9 , 0xfb48 , 0x8fc7eb }, -{ 0xEDEA , 0xfb49 , 0x8fc7f0 }, -{ 0xEDEB , 0xfb4a , 0x8fc8b1 }, -{ 0xEDEC , 0xfb4b , 0x8fc8e5 }, -{ 0xEDED , 0xfb4c , 0x8fc8f8 }, -{ 0xEDEE , 0xfb4d , 0x8fc9a6 }, -{ 0xEDEF , 0xfb4e , 0x8fc9ab }, -{ 0xEDF0 , 0xfb4f , 0x8fc9ad }, -{ 0xEDF1 , 0xfb50 , 0x8ff4cd }, -{ 0xEDF2 , 0xfb51 , 0x8fc9ca }, -{ 0xEDF3 , 0xfb52 , 0x8fc9d3 }, -{ 0xEDF4 , 0xfb53 , 0x8fc9e9 }, -{ 0xEDF5 , 0xfb54 , 0x8fc9e3 }, -{ 0xEDF6 , 0xfb55 , 0x8fc9fc }, -{ 0xEDF7 , 0xfb56 , 0x8fc9f4 }, -{ 0xEDF8 , 0xfb57 , 0x8fc9f5 }, -{ 0xEDF9 , 0xfb58 , 0x8ff4ce }, -{ 0xEDFA , 0xfb59 , 0x8fcab3 }, -{ 0xEDFB , 0xfb5a , 0x8fcabd }, -{ 0xEDFC , 0xfb5b , 0x8fcaef }, -{ 0xEE40 , 0xfb5c , 0x8fcaf1 }, -{ 0xEE41 , 0xfb5d , 0x8fcbae }, -{ 0xEE42 , 0xfb5e , 0x8ff4cf }, -{ 0xEE43 , 0xfb5f , 0x8fcbca }, -{ 0xEE44 , 0xfb60 , 0x8fcbe6 }, -{ 0xEE45 , 0xfb61 , 0x8fcbea }, -{ 0xEE46 , 0xfb62 , 0x8fcbf0 }, -{ 0xEE47 , 0xfb63 , 0x8fcbf4 }, -{ 0xEE48 , 0xfb64 , 0x8fcbee }, -{ 0xEE49 , 0xfb65 , 0x8fcca5 }, -{ 0xEE4A , 0xfb66 , 0x8fcbf9 }, -{ 0xEE4B , 0xfb67 , 0x8fccab }, -{ 0xEE4C , 0xfb68 , 0x8fccae }, -{ 0xEE4D , 0xfb69 , 0x8fccad }, -{ 0xEE4E , 0xfb6a , 0x8fccb2 }, -{ 0xEE4F , 0xfb6b , 0x8fccc2 }, -{ 0xEE50 , 0xfb6c , 0x8fccd0 }, -{ 0xEE51 , 0xfb6d , 0x8fccd9 }, -{ 0xEE52 , 0xfb6e , 0x8ff4d0 }, -{ 0xEE53 , 0xfb6f , 0x8fcdbb }, -{ 0xEE54 , 0xfb70 , 0x8ff4d1 }, -{ 0xEE55 , 0xfb71 , 0x8fcebb }, -{ 0xEE56 , 0xfb72 , 0x8ff4d2 }, -{ 0xEE57 , 0xfb73 , 0x8fceba }, -{ 0xEE58 , 0xfb74 , 0x8fcec3 }, -{ 0xEE59 , 0xfb75 , 0x8ff4d3 }, -{ 0xEE5A , 0xfb76 , 0x8fcef2 }, -{ 0xEE5B , 0xfb77 , 0x8fb3dd }, -{ 0xEE5C , 0xfb78 , 0x8fcfd5 }, -{ 0xEE5D , 0xfb79 , 0x8fcfe2 }, -{ 0xEE5E , 0xfb7a , 0x8fcfe9 }, -{ 0xEE5F , 0xfb7b , 0x8fcfed }, -{ 0xEE60 , 0xfb7c , 0x8ff4d4 }, -{ 0xEE61 , 0xfb7d , 0x8ff4d5 }, -{ 0xEE62 , 0xfb7e , 0x8ff4d6 }, -{ 0xEE63 , 0xfb80 , 0x8ff4d7 }, -{ 0xEE64 , 0xfb81 , 0x8fd0e5 }, -{ 0xEE65 , 0xfb82 , 0x8ff4d8 }, -{ 0xEE66 , 0xfb83 , 0x8fd0e9 }, -{ 0xEE67 , 0xfb84 , 0x8fd1e8 }, -{ 0xEE68 , 0xfb85 , 0x8ff4d9 }, -{ 0xEE69 , 0xfb86 , 0x8ff4da }, -{ 0xEE6A , 0xfb87 , 0x8fd1ec }, -{ 0xEE6B , 0xfb88 , 0x8fd2bb }, -{ 0xEE6C , 0xfb89 , 0x8ff4db }, -{ 0xEE6D , 0xfb8a , 0x8fd3e1 }, -{ 0xEE6E , 0xfb8b , 0x8fd3e8 }, -{ 0xEE6F , 0xfb8c , 0x8fd4a7 }, -{ 0xEE70 , 0xfb8d , 0x8ff4dc }, -{ 0xEE71 , 0xfb8e , 0x8ff4dd }, -{ 0xEE72 , 0xfb8f , 0x8fd4d4 }, -{ 0xEE73 , 0xfb90 , 0x8fd4f2 }, -{ 0xEE74 , 0xfb91 , 0x8fd5ae }, -{ 0xEE75 , 0xfb92 , 0x8ff4de }, -{ 0xEE76 , 0xfb93 , 0x8fd7de }, -{ 0xEE77 , 0xfb94 , 0x8ff4df }, -{ 0xEE78 , 0xfb95 , 0x8fd8a2 }, -{ 0xEE79 , 0xfb96 , 0x8fd8b7 }, -{ 0xEE7A , 0xfb97 , 0x8fd8c1 }, -{ 0xEE7B , 0xfb98 , 0x8fd8d1 }, -{ 0xEE7C , 0xfb99 , 0x8fd8f4 }, -{ 0xEE7D , 0xfb9a , 0x8fd9c6 }, -{ 0xEE7E , 0xfb9b , 0x8fd9c8 }, -{ 0xEE80 , 0xfb9c , 0x8fd9d1 }, -{ 0xEE81 , 0xfb9d , 0x8ff4e0 }, -{ 0xEE82 , 0xfb9e , 0x8ff4e1 }, -{ 0xEE83 , 0xfb9f , 0x8ff4e2 }, -{ 0xEE84 , 0xfba0 , 0x8ff4e3 }, -{ 0xEE85 , 0xfba1 , 0x8ff4e4 }, -{ 0xEE86 , 0xfba2 , 0x8fdcd3 }, -{ 0xEE87 , 0xfba3 , 0x8fddc8 }, -{ 0xEE88 , 0xfba4 , 0x8fddd4 }, -{ 0xEE89 , 0xfba5 , 0x8fddea }, -{ 0xEE8A , 0xfba6 , 0x8fddfa }, -{ 0xEE8B , 0xfba7 , 0x8fdea4 }, -{ 0xEE8C , 0xfba8 , 0x8fdeb0 }, -{ 0xEE8D , 0xfba9 , 0x8ff4e5 }, -{ 0xEE8E , 0xfbaa , 0x8fdeb5 }, -{ 0xEE8F , 0xfbab , 0x8fdecb }, -{ 0xEE90 , 0xfbac , 0x8ff4e6 }, -{ 0xEE91 , 0xfbad , 0x8fdfb9 }, -{ 0xEE92 , 0xfbae , 0x8ff4e7 }, -{ 0xEE93 , 0xfbaf , 0x8fdfc3 }, -{ 0xEE94 , 0xfbb0 , 0x8ff4e8 }, -{ 0xEE95 , 0xfbb1 , 0x8ff4e9 }, -{ 0xEE96 , 0xfbb2 , 0x8fe0d9 }, -{ 0xEE97 , 0xfbb3 , 0x8ff4ea }, -{ 0xEE98 , 0xfbb4 , 0x8ff4eb }, -{ 0xEE99 , 0xfbb5 , 0x8fe1e2 }, -{ 0xEE9A , 0xfbb6 , 0x8ff4ec }, -{ 0xEE9B , 0xfbb7 , 0x8ff4ed }, -{ 0xEE9C , 0xfbb8 , 0x8ff4ee }, -{ 0xEE9D , 0xfbb9 , 0x8fe2c7 }, -{ 0xEE9E , 0xfbba , 0x8fe3a8 }, -{ 0xEE9F , 0xfbbb , 0x8fe3a6 }, -{ 0xEEA0 , 0xfbbc , 0x8fe3a9 }, -{ 0xEEA1 , 0xfbbd , 0x8fe3af }, -{ 0xEEA2 , 0xfbbe , 0x8fe3b0 }, -{ 0xEEA3 , 0xfbbf , 0x8fe3aa }, -{ 0xEEA4 , 0xfbc0 , 0x8fe3ab }, -{ 0xEEA5 , 0xfbc1 , 0x8fe3bc }, -{ 0xEEA6 , 0xfbc2 , 0x8fe3c1 }, -{ 0xEEA7 , 0xfbc3 , 0x8fe3bf }, -{ 0xEEA8 , 0xfbc4 , 0x8fe3d5 }, -{ 0xEEA9 , 0xfbc5 , 0x8fe3d8 }, -{ 0xEEAA , 0xfbc6 , 0x8fe3d6 }, -{ 0xEEAB , 0xfbc7 , 0x8fe3df }, -{ 0xEEAC , 0xfbc8 , 0x8fe3e3 }, -{ 0xEEAD , 0xfbc9 , 0x8fe3e1 }, -{ 0xEEAE , 0xfbca , 0x8fe3d4 }, -{ 0xEEAF , 0xfbcb , 0x8fe3e9 }, -{ 0xEEB0 , 0xfbcc , 0x8fe4a6 }, -{ 0xEEB1 , 0xfbcd , 0x8fe3f1 }, -{ 0xEEB2 , 0xfbce , 0x8fe3f2 }, -{ 0xEEB3 , 0xfbcf , 0x8fe4cb }, -{ 0xEEB4 , 0xfbd0 , 0x8fe4c1 }, -{ 0xEEB5 , 0xfbd1 , 0x8fe4c3 }, -{ 0xEEB6 , 0xfbd2 , 0x8fe4be }, -{ 0xEEB7 , 0xfbd3 , 0x8ff4ef }, -{ 0xEEB8 , 0xfbd4 , 0x8fe4c0 }, -{ 0xEEB9 , 0xfbd5 , 0x8fe4c7 }, -{ 0xEEBA , 0xfbd6 , 0x8fe4bf }, -{ 0xEEBB , 0xfbd7 , 0x8fe4e0 }, -{ 0xEEBC , 0xfbd8 , 0x8fe4de }, -{ 0xEEBD , 0xfbd9 , 0x8fe4d1 }, -{ 0xEEBE , 0xfbda , 0x8ff4f0 }, -{ 0xEEBF , 0xfbdb , 0x8fe4dc }, -{ 0xEEC0 , 0xfbdc , 0x8fe4d2 }, -{ 0xEEC1 , 0xfbdd , 0x8fe4db }, -{ 0xEEC2 , 0xfbde , 0x8fe4d4 }, -{ 0xEEC3 , 0xfbdf , 0x8fe4fa }, -{ 0xEEC4 , 0xfbe0 , 0x8fe4ef }, -{ 0xEEC5 , 0xfbe1 , 0x8fe5b3 }, -{ 0xEEC6 , 0xfbe2 , 0x8fe5bf }, -{ 0xEEC7 , 0xfbe3 , 0x8fe5c9 }, -{ 0xEEC8 , 0xfbe4 , 0x8fe5d0 }, -{ 0xEEC9 , 0xfbe5 , 0x8fe5e2 }, -{ 0xEECA , 0xfbe6 , 0x8fe5ea }, -{ 0xEECB , 0xfbe7 , 0x8fe5eb }, -{ 0xEECC , 0xfbe8 , 0x8ff4f1 }, -{ 0xEECD , 0xfbe9 , 0x8ff4f2 }, -{ 0xEECE , 0xfbea , 0x8ff4f3 }, -{ 0xEECF , 0xfbeb , 0x8fe6e8 }, -{ 0xEED0 , 0xfbec , 0x8fe6ef }, -{ 0xEED1 , 0xfbed , 0x8fe7ac }, -{ 0xEED2 , 0xfbee , 0x8ff4f4 }, -{ 0xEED3 , 0xfbef , 0x8fe7ae }, -{ 0xEED4 , 0xfbf0 , 0x8ff4f5 }, -{ 0xEED5 , 0xfbf1 , 0x8fe7b1 }, -{ 0xEED6 , 0xfbf2 , 0x8ff4f6 }, -{ 0xEED7 , 0xfbf3 , 0x8fe7b2 }, -{ 0xEED8 , 0xfbf4 , 0x8fe8b1 }, -{ 0xEED9 , 0xfbf5 , 0x8fe8b6 }, -{ 0xEEDA , 0xfbf6 , 0x8ff4f7 }, -{ 0xEEDB , 0xfbf7 , 0x8ff4f8 }, -{ 0xEEDC , 0xfbf8 , 0x8fe8dd }, -{ 0xEEDD , 0xfbf9 , 0x8ff4f9 }, -{ 0xEEDE , 0xfbfa , 0x8ff4fa }, -{ 0xEEDF , 0xfbfb , 0x8fe9d1 }, -{ 0xEEE0 , 0xfbfc , 0x8ff4fb }, -{ 0xEEE1 , 0xfc40 , 0x8fe9ed }, -{ 0xEEE2 , 0xfc41 , 0x8feacd }, -{ 0xEEE3 , 0xfc42 , 0x8ff4fc }, -{ 0xEEE4 , 0xfc43 , 0x8feadb }, -{ 0xEEE5 , 0xfc44 , 0x8feae6 }, -{ 0xEEE6 , 0xfc45 , 0x8feaea }, -{ 0xEEE7 , 0xfc46 , 0x8feba5 }, -{ 0xEEE8 , 0xfc47 , 0x8febfb }, -{ 0xEEE9 , 0xfc48 , 0x8febfa }, -{ 0xEEEA , 0xfc49 , 0x8ff4fd }, -{ 0xEEEB , 0xfc4a , 0x8fecd6 }, -{ 0xEEEC , 0xfc4b , 0x8ff4fe }, -{ 0xffff , 0xffff , 0xffff } /* Stop code */ -}; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 5162509b261..482423f805f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -5,7 +5,7 @@ * command, configuration file, and command line options. * See src/backend/utils/misc/README for more information. * - * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.71 2002/07/13 01:02:14 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.72 2002/07/18 02:02:30 ishii Exp $ * * Copyright 2000 by PostgreSQL Global Development Group * Written by Peter Eisentraut . @@ -28,6 +28,7 @@ #include "fmgr.h" #include "libpq/auth.h" #include "libpq/pqcomm.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" @@ -1115,11 +1116,9 @@ InitializeGUCOptions(void) if (env != NULL) SetConfigOption("timezone", env, PGC_POSTMASTER, PGC_S_ENV_VAR); -#ifdef MULTIBYTE env = getenv("PGCLIENTENCODING"); if (env != NULL) SetConfigOption("client_encoding", env, PGC_POSTMASTER, PGC_S_ENV_VAR); -#endif } diff --git a/src/bin/initdb/initdb.sh b/src/bin/initdb/initdb.sh index 9948bcd7fe4..a88eed353b1 100644 --- a/src/bin/initdb/initdb.sh +++ b/src/bin/initdb/initdb.sh @@ -27,7 +27,7 @@ # Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # -# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.158 2002/07/16 17:48:46 tgl Exp $ +# $Header: /cvsroot/pgsql/src/bin/initdb/Attic/initdb.sh,v 1.159 2002/07/18 02:02:30 ishii Exp $ # #------------------------------------------------------------------------- @@ -994,6 +994,11 @@ EOF | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely echo "ok" +# Create pg_conversion and support functions +$ECHO_N "creating conversions... "$ECHO_C +cat $datadir/conversion_create.sql | "$PGPATH"/postgres $PGSQL_OPT template1 > /dev/null || exit_nicely +echo "ok" + # Set most system catalogs and built-in functions as world-accessible. # Some objects may require different permissions by default, so we # make sure we don't overwrite privilege sets that have already been diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index f28af37d931..f99f909f285 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -3,7 +3,7 @@ * * Copyright 2000 by PostgreSQL Global Development Group * - * $Header: /cvsroot/pgsql/src/bin/psql/command.c,v 1.73 2002/07/15 01:56:25 momjian Exp $ + * $Header: /cvsroot/pgsql/src/bin/psql/command.c,v 1.74 2002/07/18 02:02:30 ishii Exp $ */ #include "postgres_fe.h" #include "command.h" @@ -38,14 +38,7 @@ #include "print.h" #include "settings.h" #include "variables.h" - -#ifdef MULTIBYTE #include "mb/pg_wchar.h" -#else -/* Grand unified hard-coded badness */ -#define pg_encoding_to_char(x) "SQL_ASCII" -#endif - /* functions for use in this file */ @@ -457,10 +450,9 @@ exec_command(const char *cmd, puts(pg_encoding_to_char(pset.encoding)); else { -#ifdef MULTIBYTE /* set encoding */ if (PQsetClientEncoding(pset.db, encoding) == -1) - psql_error("%s: invalid encoding name\n", encoding); + psql_error("%s: invalid encoding name or conversion proc not found\n", encoding); else { @@ -468,9 +460,6 @@ exec_command(const char *cmd, pset.encoding = PQclientEncoding(pset.db); SetVariable(pset.vars, "ENCODING", pg_encoding_to_char(pset.encoding)); } -#else - psql_error("\\%s: multibyte support is not enabled\n", cmd); -#endif free(encoding); } } diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 61c797f2445..a101d176a82 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.39 2002/06/13 08:30:22 ishii Exp $ */ +/* $Id: pg_wchar.h,v 1.40 2002/07/18 02:02:30 ishii Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -145,7 +145,7 @@ typedef unsigned int pg_wchar; * Encoding numeral identificators * * WARNING: the order of this table must be same as order - * in the pg_enconv[] (mb/conv.c) and pg_enc2name[] (mb/encnames.c) array! + * in the pg_enc2name[] (mb/encnames.c) array! * * If you add some encoding don'y forget check * PG_ENCODING_[BE|FE]_LAST macros. @@ -248,30 +248,6 @@ extern pg_encname *pg_char_to_encname_struct(const char *name); extern int pg_char_to_encoding(const char *s); extern const char *pg_encoding_to_char(int encoding); -typedef void (*to_mic_converter) (unsigned char *l, unsigned char *p, int len); -typedef void (*from_mic_converter) (unsigned char *mic, unsigned char *p, int len); - -/* - * The backend encoding conversion routines - * Careful: - * - * if (PG_VALID_ENCODING(enc)) - * pg_encconv_tbl[ enc ]->foo - */ -#ifndef FRONTEND -typedef struct pg_enconv -{ - pg_enc encoding; /* encoding identifier */ - to_mic_converter to_mic; /* client encoding to MIC */ - from_mic_converter from_mic; /* MIC to client encoding */ - to_mic_converter to_unicode; /* client encoding to UTF-8 */ - from_mic_converter from_unicode; /* UTF-8 to client encoding */ -} pg_enconv; - -extern pg_enconv pg_enconv_tbl[]; -extern pg_enconv *pg_get_enconv_by_encoding(int encoding); -#endif /* FRONTEND */ - /* * pg_wchar stuff */ @@ -325,7 +301,8 @@ extern int pg_mbcharcliplen(const unsigned char *, int, int); extern int pg_encoding_max_length(int); extern int pg_database_encoding_max_length(void); -extern int pg_set_client_encoding(int); +extern void SetDefaultClientEncoding(void); +extern int SetClientEncoding(int encoding, bool doit); extern int pg_get_client_encoding(void); extern const char *pg_get_client_encoding_name(void); @@ -337,12 +314,9 @@ extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); extern int pg_utf_mblen(const unsigned char *); -extern int pg_find_encoding_converters(int src, int dest, - to_mic_converter *src_to_mic, - from_mic_converter *dest_from_mic); extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, - to_mic_converter src_to_mic, - from_mic_converter dest_from_mic); + int src_encoding, + int dest_encoding); extern unsigned char *pg_client_to_server(unsigned char *, int); extern unsigned char *pg_server_to_client(unsigned char *, int); @@ -350,7 +324,18 @@ extern unsigned char *pg_server_to_client(unsigned char *, int); extern unsigned short BIG5toCNS(unsigned short, unsigned char *); extern unsigned short CNStoBIG5(unsigned short, unsigned char); +extern void LocalToUtf(unsigned char *iso, unsigned char *utf, + pg_local_to_utf *map, int size, int encoding, int len); + +extern void UtfToLocal(unsigned char *utf, unsigned char *iso, + pg_utf_to_local *map, int size, int len); + char *pg_verifymbstr(const unsigned char *, int); + +void pg_ascii2mic(unsigned char *src, unsigned char *dest, int len); +void pg_mic2ascii(unsigned char *src, unsigned char *dest, int len); +void pg_print_bogus_char(unsigned char **mic, unsigned char **p); + #endif /* MULTIBYTE */ #endif /* PG_WCHAR_H */ diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 2380d0b0551..630141ed549 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.188 2002/06/20 20:29:53 momjian Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.189 2002/07/18 02:02:30 ishii Exp $ * *------------------------------------------------------------------------- */ @@ -2714,7 +2714,6 @@ PQclientEncoding(const PGconn *conn) return conn->client_encoding; } -#ifdef MULTIBYTE int PQsetClientEncoding(PGconn *conn, const char *encoding) { @@ -2751,15 +2750,6 @@ PQsetClientEncoding(PGconn *conn, const char *encoding) return (status); } -#else /* without multibytle support */ - -int -PQsetClientEncoding(PGconn *conn, const char *encoding) -{ - return -1; -} -#endif - void PQtrace(PGconn *conn, FILE *debug_port) {