diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index e164866a77..32d298780b 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.74 2010/01/04 20:38:31 adunstan Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.75 2010/08/18 19:54:01 tgl Exp $ * */ /* can be used in either frontend or backend */ @@ -462,7 +462,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string) * We return "1" for any leading byte that is either flat-out illegal or * indicates a length larger than we support. * - * pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps + * pg_utf2wchar_with_len(), utf8_to_unicode(), pg_utf8_islegal(), and perhaps * other places would need to be fixed to change this. */ int @@ -632,13 +632,15 @@ ucs_wcwidth(pg_wchar ucs) (ucs >= 0x20000 && ucs <= 0x2ffff))); } -static pg_wchar -utf2ucs(const unsigned char *c) +/* + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * No error checks here, c must point to a long-enough string. + */ +pg_wchar +utf8_to_unicode(const unsigned char *c) { - /* - * one char version of pg_utf2wchar_with_len. no control here, c must - * point to a large enough string - */ if ((*c & 0x80) == 0) return (pg_wchar) c[0]; else if ((*c & 0xe0) == 0xc0) @@ -661,7 +663,7 @@ utf2ucs(const unsigned char *c) static int pg_utf_dsplen(const unsigned char *s) { - return ucs_wcwidth(utf2ucs(s)); + return ucs_wcwidth(utf8_to_unicode(s)); } /* diff --git a/src/bin/psql/mbprint.c b/src/bin/psql/mbprint.c index d46aaef544..141f860041 100644 --- a/src/bin/psql/mbprint.c +++ b/src/bin/psql/mbprint.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.39 2010/08/16 00:06:18 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/mbprint.c,v 1.40 2010/08/18 19:54:01 tgl Exp $ * * XXX this file does not really belong in psql/. Perhaps move to libpq? * It also seems that the mbvalidate function is redundant with existing @@ -43,13 +43,15 @@ pg_get_utf8_id(void) #define PG_UTF8 pg_get_utf8_id() +/* + * Convert a UTF-8 character to a Unicode code point. + * This is a one-character version of pg_utf2wchar_with_len. + * + * No error checks here, c must point to a long-enough string. + */ static pg_wchar -utf2ucs(const unsigned char *c) +utf8_to_unicode(const unsigned char *c) { - /* - * one char version of pg_utf2wchar_with_len. no control here, c must - * point to a large enough string - */ if ((*c & 0x80) == 0) return (pg_wchar) c[0]; else if ((*c & 0xe0) == 0xc0) @@ -346,7 +348,7 @@ pg_wcsformat(unsigned char *pwcs, size_t len, int encoding, else if (w < 0) /* Non-ascii control char */ { if (encoding == PG_UTF8) - sprintf((char *) ptr, "\\u%04X", utf2ucs(pwcs)); + sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs)); else { /* diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 817f9aaaaa..33e0c5ae08 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.94 2010/02/26 02:01:25 momjian Exp $ + * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.95 2010/08/18 19:54:01 tgl Exp $ * * NOTES * This is used both by the backend and by libpq, but should not be @@ -412,6 +412,7 @@ extern int pg_valid_client_encoding(const char *name); extern int pg_valid_server_encoding(const char *name); extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string); +extern pg_wchar utf8_to_unicode(const unsigned char *c); extern int pg_utf_mblen(const unsigned char *); extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, int src_encoding,