diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 5db8856963..44077a3fb2 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.236.4.2 2005/10/03 23:43:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.236.4.3 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -98,6 +98,7 @@ static bool fe_eof;				/* true if detected end of copy data */
 static EolType eol_type;		/* EOL type of input */
 static int	client_encoding;	/* remote side's character encoding */
 static int	server_encoding;	/* local encoding */
+static int	server_max_length;	/* local encoding max length */
 static bool embedded_line_warning;
 
 /* these are just for error messages, see copy_in_error_callback */
@@ -988,6 +989,7 @@ DoCopy(const CopyStmt *stmt)
 
 	client_encoding = pg_get_client_encoding();
 	server_encoding = GetDatabaseEncoding();
+	server_max_length = pg_database_encoding_max_length();
 
 	copy_dest = COPY_FILE;		/* default */
 	copy_file = NULL;
@@ -2010,7 +2012,8 @@ static bool
 CopyReadLine(void)
 {
 	bool		result;
-	bool		change_encoding = (client_encoding != server_encoding);
+	bool		change_encoding = (client_encoding != server_encoding ||
+								   server_max_length > 1);
 	int			c;
 	int			mblen;
 	int			j;
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
index 1200ad9b34..211619ec15 100644
--- a/src/backend/utils/adt/name.c
+++ b/src/backend/utils/adt/name.c
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/name.c,v 1.55 2004/12/31 22:01:22 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/name.c,v 1.55.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -49,10 +49,7 @@ namein(PG_FUNCTION_ARGS)
 	NameData   *result;
 	int			len;
 
-	/* verify encoding */
 	len = strlen(s);
-	pg_verifymbstr(s, len, false);
-
 	len = pg_mbcliplen(s, len, NAMEDATALEN - 1);
 
 	result = (NameData *) palloc0(NAMEDATALEN);
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 40e771ffab..19b2de618a 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.108.4.1 2005/12/22 22:50:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.108.4.2 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -75,10 +75,7 @@ bpcharin(PG_FUNCTION_ARGS)
 	int			i;
 	int			charlen;		/* number of charcters in the input string */
 
-	/* verify encoding */
 	len = strlen(s);
-	pg_verifymbstr(s, len, false);
-
 	charlen = pg_mbstrlen(s);
 
 	/* If typmod is -1 (or invalid), use the actual string length */
@@ -364,10 +361,7 @@ varcharin(PG_FUNCTION_ARGS)
 	size_t		len,
 				maxlen;
 
-	/* verify encoding */
 	len = strlen(s);
-	pg_verifymbstr(s, len, false);
-
 	maxlen = atttypmod - VARHDRSZ;
 
 	if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index f7f64add23..891c412d9e 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.118.4.1 2005/12/22 22:50:14 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.118.4.2 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -260,10 +260,7 @@ textin(PG_FUNCTION_ARGS)
 	text	   *result;
 	int			len;
 
-	/* verify encoding */
 	len = strlen(inputText);
-	pg_verifymbstr(inputText, len, false);
-
 	result = (text *) palloc(len + VARHDRSZ);
 	VARATT_SIZEP(result) = len + VARHDRSZ;
 
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index c84d860e84..7997985b55 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -6,172 +6,81 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.51 2004/12/31 22:01:42 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conv.c,v 1.51.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 #include "mb/pg_wchar.h"
 
+
 /*
- * convert bogus chars that cannot be represented in the current
- * encoding system.
+ * LATINn ---> MIC when the charset's local codes map directly to MIC
+ *
+ * l points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
  */
 void
-pg_print_bogus_char(unsigned char **mic, unsigned char **p)
-{
-	char		strbuf[16];
-	int			l = pg_mic_mblen(*mic);
-
-	*(*p)++ = '(';
-	while (l--)
-	{
-		sprintf(strbuf, "%02x", *(*mic)++);
-		*(*p)++ = strbuf[0];
-		*(*p)++ = strbuf[1];
-	}
-	*(*p)++ = ')';
-}
-
-#ifdef NOT_USED
-
-/*
- * GB18030 ---> MIC
- * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
- */
-static void
-gb180302mic(unsigned char *gb18030, unsigned char *p, int len)
-{
-	int			c1;
-	int			c2;
-
-	while (len > 0 && (c1 = *gb18030++))
-	{
-		if (c1 < 0x80)
-		{						/* should be ASCII */
-			len--;
-			*p++ = c1;
-		}
-		else if (c1 >= 0x81 && c1 <= 0xfe)
-		{
-			c2 = *gb18030++;
-
-			if (c2 >= 0x30 && c2 <= 0x69)
-			{
-				len -= 4;
-				*p++ = c1;
-				*p++ = c2;
-				*p++ = *gb18030++;
-				*p++ = *gb18030++;
-				*p++ = *gb18030++;
-			}
-			else if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe))
-			{
-				len -= 2;
-				*p++ = c1;
-				*p++ = c2;
-				*p++ = *gb18030++;
-			}
-			else
-			{					/* throw the strange code */
-				len--;
-			}
-		}
-	}
-	*p = '\0';
-}
-
-/*
- * MIC ---> GB18030
- * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
- */
-static void
-mic2gb18030(unsigned char *mic, unsigned char *p, int len)
-{
-	int			c1;
-	int			c2;
-
-	while (len > 0 && (c1 = *mic))
-	{
-		len -= pg_mic_mblen(mic++);
-
-		if (c1 <= 0x7f)			/* ASCII */
-			*p++ = c1;
-		else if (c1 >= 0x81 && c1 <= 0xfe)
-		{
-			c2 = *mic++;
-
-			if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe))
-			{
-				*p++ = c1;
-				*p++ = c2;
-			}
-			else if (c2 >= 0x30 && c2 <= 0x39)
-			{
-				*p++ = c1;
-				*p++ = c2;
-				*p++ = *mic++;
-				*p++ = *mic++;
-			}
-			else
-			{
-				mic--;
-				pg_print_bogus_char(&mic, &p);
-				mic--;
-				pg_print_bogus_char(&mic, &p);
-			}
-		}
-		else
-		{
-			mic--;
-			pg_print_bogus_char(&mic, &p);
-		}
-	}
-	*p = '\0';
-}
-#endif
-
-/*
- * LATINn ---> MIC
- */
-void
-latin2mic(unsigned char *l, unsigned char *p, int len, int lc)
+latin2mic(const unsigned char *l, unsigned char *p, int len,
+		  int lc, int encoding)
 {
 	int			c1;
 
-	while (len-- > 0 && (c1 = *l++))
+	while (len > 0)
 	{
-		if (c1 > 0x7f)
-		{						/* Latin? */
+		c1 = *l;
+		if (c1 == 0)
+			report_invalid_encoding(encoding, (const char *) l, len);
+		if (IS_HIGHBIT_SET(c1))
 			*p++ = lc;
-		}
 		*p++ = c1;
+		l++;
+		len--;
 	}
 	*p = '\0';
 }
 
 /*
- * MIC ---> LATINn
+ * MIC ---> LATINn when the charset's local codes map directly to MIC
+ *
+ * mic points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
  */
 void
-mic2latin(unsigned char *mic, unsigned char *p, int len, int lc)
+mic2latin(const unsigned char *mic, unsigned char *p, int len,
+		  int lc, int encoding)
 {
 	int			c1;
 
-	while (len > 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
-		if (c1 == lc)
-			*p++ = *mic++;
-		else if (c1 > 0x7f)
+		c1 = *mic;
+		if (c1 == 0)
+			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (!IS_HIGHBIT_SET(c1))
 		{
-			mic--;
-			pg_print_bogus_char(&mic, &p);
+			/* easy for ASCII */
+			*p++ = c1;
+			mic++;
+			len--;
 		}
 		else
-		{						/* should be ASCII */
-			*p++ = c1;
+		{
+			int		l = pg_mic_mblen(mic);
+
+			if (len < l)
+				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
+										len);
+			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
+				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
+										   (const char *) mic, len);
+			*p++ = mic[1];
+			mic += 2;
+			len -= 2;
 		}
 	}
 	*p = '\0';
@@ -180,14 +89,25 @@ mic2latin(unsigned char *mic, unsigned char *p, int len, int lc)
 
 /*
  * ASCII ---> MIC
+ *
+ * While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
+ * characters, here we must take a hard line because we don't know
+ * the appropriate MIC equivalent.
  */
 void
-pg_ascii2mic(unsigned char *l, unsigned char *p, int len)
+pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
 {
 	int			c1;
 
-	while (len-- > 0 && (c1 = *l++))
-		*p++ = (c1 & 0x7f);
+	while (len > 0)
+	{
+		c1 = *l;
+		if (c1 == 0 || IS_HIGHBIT_SET(c1))
+			report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
+		*p++ = c1;
+		l++;
+		len--;
+	}
 	*p = '\0';
 }
 
@@ -195,19 +115,19 @@ pg_ascii2mic(unsigned char *l, unsigned char *p, int len)
  * MIC ---> ASCII
  */
 void
-pg_mic2ascii(unsigned char *mic, unsigned char *p, int len)
+pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;
 
-	while (len-- > 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		if (c1 > 0x7f)
-			pg_print_bogus_char(&mic, &p);
-		else
-		{						/* should be ASCII */
-			*p++ = c1;
-			mic++;
-		}
+		c1 = *mic;
+		if (c1 == 0 || IS_HIGHBIT_SET(c1))
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
+									   (const char *) mic, len);
+		*p++ = c1;
+		mic++;
+		len--;
 	}
 	*p = '\0';
 }
@@ -215,87 +135,103 @@ pg_mic2ascii(unsigned char *mic, unsigned char *p, int len)
 /*
  * latin2mic_with_table: a generic single byte charset encoding
  * conversion from a local charset to the mule internal code.
- * with a encoding conversion table.
- * the table is ordered according to the local charset,
+ *
+ * l points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ * tab holds conversion entries for the local charset
  * starting from 128 (0x80). each entry in the table
  * holds the corresponding code point for the mule internal code.
  */
 void
-latin2mic_with_table(
-					 unsigned char *l,	/* local charset string (source) */
-					 unsigned char *p,	/* pointer to store mule internal
-										 * code (destination) */
-					 int len,	/* length of l */
-					 int lc,	/* leading character of p */
-					 unsigned char *tab /* code conversion table */
-)
+latin2mic_with_table(const unsigned char *l,
+					 unsigned char *p,
+					 int len,
+					 int lc,
+					 int encoding,
+					 const unsigned char *tab)
 {
 	unsigned char c1,
 				c2;
 
-	while (len-- > 0 && (c1 = *l++))
+	while (len > 0)
 	{
-		if (c1 < 128)
+		c1 = *l;
+		if (c1 == 0)
+			report_invalid_encoding(encoding, (const char *) l, len);
+		if (!IS_HIGHBIT_SET(c1))
 			*p++ = c1;
 		else
 		{
-			c2 = tab[c1 - 128];
+			c2 = tab[c1 - HIGHBIT];
 			if (c2)
 			{
 				*p++ = lc;
 				*p++ = c2;
 			}
 			else
-			{
-				*p++ = ' ';		/* cannot convert */
-			}
+				report_untranslatable_char(encoding, PG_MULE_INTERNAL,
+										   (const char *) l, len);
 		}
+		l++;
+		len--;
 	}
 	*p = '\0';
 }
 
 /*
  * mic2latin_with_table: a generic single byte charset encoding
- * conversion from the mule internal code to a local charset
- * with a encoding conversion table.
- * the table is ordered according to the second byte of the mule
- * internal code starting from 128 (0x80).
- * each entry in the table
- * holds the corresponding code point for the local code.
+ * conversion from the mule internal code to a local charset.
+ *
+ * mic points to the source string of length len
+ * p is the output area (must be large enough!)
+ * lc is the mule character set id for the local encoding
+ * encoding is the PG identifier for the local encoding
+ * tab holds conversion entries for the mule internal code's
+ * second byte, starting from 128 (0x80). each entry in the table
+ * holds the corresponding code point for the local charset.
  */
 void
-mic2latin_with_table(
-					 unsigned char *mic,		/* mule internal code
-												 * (source) */
-					 unsigned char *p,	/* local code (destination) */
-					 int len,	/* length of p */
-					 int lc,	/* leading character */
-					 unsigned char *tab /* code conversion table */
-)
+mic2latin_with_table(const unsigned char *mic,
+					 unsigned char *p,
+					 int len,
+					 int lc,
+					 int encoding,
+					 const unsigned char *tab)
 {
-
 	unsigned char c1,
 				c2;
 
-	while (len-- > 0 && (c1 = *mic++))
+	while (len > 0)
 	{
-		if (c1 < 128)
-			*p++ = c1;
-		else if (c1 == lc)
+		c1 = *mic;
+		if (c1 == 0)
+			report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (!IS_HIGHBIT_SET(c1))
 		{
-			c1 = *mic++;
+			/* easy for ASCII */
+			*p++ = c1;
+			mic++;
 			len--;
-			c2 = tab[c1 - 128];
-			if (c2)
-				*p++ = c2;
-			else
-			{
-				*p++ = ' ';		/* cannot convert */
-			}
 		}
 		else
 		{
-			*p++ = ' ';			/* bogus character */
+			int		l = pg_mic_mblen(mic);
+
+			if (len < l)
+				report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
+										len);
+			if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
+				(c2 = tab[mic[1] - HIGHBIT]) == 0)
+			{
+				report_untranslatable_char(PG_MULE_INTERNAL, encoding,
+										   (const char *) mic, len);
+				break;			/* keep compiler quiet */
+			}
+			*p++ = c2;
+			mic += 2;
+			len -= 2;
 		}
 	}
 	*p = '\0';
@@ -332,27 +268,40 @@ compare2(const void *p1, const void *p2)
 }
 
 /*
- * UTF-8 ---> local code
+ * UTF8 ---> local code
  *
- * utf: input UTF-8 string. Its length is limited by "len" parameter
- *		or a null terminator.
- * iso: pointer to the output.
+ * utf: input UTF8 string (need not be null-terminated).
+ * iso: pointer to the output area (must be large enough!)
  * map: the conversion map.
  * size: the size of the conversion map.
+ * encoding: the PG identifier for the local encoding.
+ * len: length of input string.
  */
 void
-UtfToLocal(unsigned char *utf, unsigned char *iso,
-		   pg_utf_to_local *map, int size, int len)
+UtfToLocal(const unsigned char *utf, unsigned char *iso,
+		   const pg_utf_to_local *map, int size, int encoding, int len)
 {
 	unsigned int iutf;
 	int			l;
 	pg_utf_to_local *p;
 
-	for (; len > 0 && *utf; len -= l)
+	for (; len > 0; len -= l)
 	{
+		/* "break" cases all represent errors */
+		if (*utf == '\0')
+			break;
+
 		l = pg_utf_mblen(utf);
+
+		if (len < l)
+			break;
+
+		if (!pg_utf8_islegal(utf, l))
+			break;
+
 		if (l == 1)
 		{
+			/* ASCII case is easy */
 			*iso++ = *utf++;
 			continue;
 		}
@@ -361,22 +310,27 @@ UtfToLocal(unsigned char *utf, unsigned char *iso,
 			iutf = *utf++ << 8;
 			iutf |= *utf++;
 		}
-		else
+		else if (l == 3)
 		{
 			iutf = *utf++ << 16;
 			iutf |= *utf++ << 8;
 			iutf |= *utf++;
 		}
+		else if (l == 4)
+		{
+			iutf = *utf++ << 24;
+			iutf |= *utf++ << 16;
+			iutf |= *utf++ << 8;
+			iutf |= *utf++;
+		}
+
 		p = bsearch(&iutf, map, size,
 					sizeof(pg_utf_to_local), compare1);
+
 		if (p == NULL)
-		{
-			ereport(WARNING,
-					(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-				  errmsg("ignoring unconvertible UTF-8 character 0x%04x",
-						 iutf)));
-			continue;
-		}
+			report_untranslatable_char(PG_UTF8, encoding,
+									   (const char *) (utf - l), len);
+
 		if (p->code & 0xff000000)
 			*iso++ = p->code >> 24;
 		if (p->code & 0x00ff0000)
@@ -386,15 +340,26 @@ UtfToLocal(unsigned char *utf, unsigned char *iso,
 		if (p->code & 0x000000ff)
 			*iso++ = p->code & 0x000000ff;
 	}
+
+	if (len > 0)
+		report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+
 	*iso = '\0';
 }
 
 /*
- * local code ---> UTF-8
+ * local code ---> UTF8
+ *
+ * iso: input local string (need not be null-terminated).
+ * utf: pointer to the output area (must be large enough!)
+ * map: the conversion map.
+ * size: the size of the conversion map.
+ * encoding: the PG identifier for the local encoding.
+ * len: length of input string.
  */
 void
-LocalToUtf(unsigned char *iso, unsigned char *utf,
-		   pg_local_to_utf *map, int size, int encoding, int len)
+LocalToUtf(const unsigned char *iso, unsigned char *utf,
+		   const pg_local_to_utf *map, int size, int encoding, int len)
 {
 	unsigned int iiso;
 	int			l;
@@ -405,16 +370,23 @@ LocalToUtf(unsigned char *iso, unsigned char *utf,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid encoding number: %d", encoding)));
 
-	for (; len > 0 && *iso; len -= l)
+	for (; len > 0; len -= l)
 	{
-		if (*iso < 0x80)
+		/* "break" cases all represent errors */
+		if (*iso == '\0')
+			break;
+
+		if (!IS_HIGHBIT_SET(*iso))
 		{
+			/* ASCII case is easy */
 			*utf++ = *iso++;
 			l = 1;
 			continue;
 		}
 
-		l = pg_encoding_mblen(encoding, iso);
+		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
+		if (l < 0)
+			break;
 
 		if (l == 1)
 			iiso = *iso++;
@@ -436,16 +408,13 @@ LocalToUtf(unsigned char *iso, unsigned char *utf,
 			iiso |= *iso++ << 8;
 			iiso |= *iso++;
 		}
+
 		p = bsearch(&iiso, map, size,
 					sizeof(pg_local_to_utf), compare2);
 		if (p == NULL)
-		{
-			ereport(WARNING,
-					(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-					 errmsg("ignoring unconvertible %s character 0x%04x",
-							(&pg_enc2name_tbl[encoding])->name, iiso)));
-			continue;
-		}
+			report_untranslatable_char(encoding, PG_UTF8,
+									   (const char *) (iso - l), len);
+
 		if (p->utf & 0xff000000)
 			*utf++ = p->utf >> 24;
 		if (p->utf & 0x00ff0000)
@@ -455,5 +424,9 @@ LocalToUtf(unsigned char *iso, unsigned char *utf,
 		if (p->utf & 0x000000ff)
 			*utf++ = p->utf & 0x000000ff;
 	}
+
+	if (len > 0)
+		report_invalid_encoding(encoding, (const char *) iso, len);
+
 	*utf = '\0';
 }
diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
index 436218fd7f..0ccf97cfab 100644
--- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.9 2004/12/31 22:01:48 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -70,14 +70,14 @@ extern Datum alt_to_iso(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void koi8r2mic(unsigned char *l, unsigned char *p, int len);
-static void mic2koi8r(unsigned char *mic, unsigned char *p, int len);
-static void iso2mic(unsigned char *l, unsigned char *p, int len);
-static void mic2iso(unsigned char *mic, unsigned char *p, int len);
-static void win12512mic(unsigned char *l, unsigned char *p, int len);
-static void mic2win1251(unsigned char *mic, unsigned char *p, int len);
-static void alt2mic(unsigned char *l, unsigned char *p, int len);
-static void mic2alt(unsigned char *mic, unsigned char *p, int len);
+static void koi8r2mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2koi8r(const unsigned char *mic, unsigned char *p, int len);
+static void iso2mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2iso(const unsigned char *mic, unsigned char *p, int len);
+static void win12512mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2win1251(const unsigned char *mic, unsigned char *p, int len);
+static void alt2mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2alt(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 koi8r_to_mic(PG_FUNCTION_ARGS)
@@ -401,7 +401,7 @@ win1251_to_iso(PG_FUNCTION_ARGS)
 
 	buf = palloc(len * ENCODING_GROWTH_RATE);
 	win12512mic(src, buf, len);
-	mic2win1251(buf, dest, strlen(buf));
+	mic2iso(buf, dest, strlen(buf));
 	pfree(buf);
 
 	PG_RETURN_VOID();
@@ -441,7 +441,7 @@ alt_to_iso(PG_FUNCTION_ARGS)
 
 	buf = palloc(len * ENCODING_GROWTH_RATE);
 	alt2mic(src, buf, len);
-	mic2alt(buf, dest, strlen(buf));
+	mic2iso(buf, dest, strlen(buf));
 	pfree(buf);
 
 	PG_RETURN_VOID();
@@ -460,23 +460,23 @@ alt_to_iso(PG_FUNCTION_ARGS)
 
 /* koi8r2mic: KOI8-R to Mule internal code */
 static void
-koi8r2mic(unsigned char *l, unsigned char *p, int len)
+koi8r2mic(const unsigned char *l, unsigned char *p, int len)
 {
-	latin2mic(l, p, len, LC_KOI8_R);
+	latin2mic(l, p, len, LC_KOI8_R, PG_KOI8R);
 }
 
 /* mic2koi8r: Mule internal code to KOI8-R */
 static void
-mic2koi8r(unsigned char *mic, unsigned char *p, int len)
+mic2koi8r(const unsigned char *mic, unsigned char *p, int len)
 {
-	mic2latin(mic, p, len, LC_KOI8_R);
+	mic2latin(mic, p, len, LC_KOI8_R, PG_KOI8R);
 }
 
 /* iso2mic: ISO-8859-5 to Mule internal code */
 static void
-iso2mic(unsigned char *l, unsigned char *p, int len)
+iso2mic(const unsigned char *l, unsigned char *p, int len)
 {
-	static unsigned char iso2koi[] = {
+	static const unsigned char iso2koi[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -495,14 +495,14 @@ iso2mic(unsigned char *l, unsigned char *p, int len)
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 	};
 
-	latin2mic_with_table(l, p, len, LC_KOI8_R, iso2koi);
+	latin2mic_with_table(l, p, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi);
 }
 
 /* mic2iso: Mule internal code to ISO8859-5 */
 static void
-mic2iso(unsigned char *mic, unsigned char *p, int len)
+mic2iso(const unsigned char *mic, unsigned char *p, int len)
 {
-	static unsigned char koi2iso[] = {
+	static const unsigned char koi2iso[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -521,14 +521,14 @@ mic2iso(unsigned char *mic, unsigned char *p, int len)
 		0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca
 	};
 
-	mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2iso);
+	mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso);
 }
 
 /* win2mic: CP1251 to Mule internal code */
 static void
-win12512mic(unsigned char *l, unsigned char *p, int len)
+win12512mic(const unsigned char *l, unsigned char *p, int len)
 {
-	static unsigned char win2koi[] = {
+	static const unsigned char win2koi[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -547,14 +547,14 @@ win12512mic(unsigned char *l, unsigned char *p, int len)
 		0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1
 	};
 
-	latin2mic_with_table(l, p, len, LC_KOI8_R, win2koi);
+	latin2mic_with_table(l, p, len, LC_KOI8_R, PG_WIN1251, win2koi);
 }
 
 /* mic2win: Mule internal code to CP1251 */
 static void
-mic2win1251(unsigned char *mic, unsigned char *p, int len)
+mic2win1251(const unsigned char *mic, unsigned char *p, int len)
 {
-	static unsigned char koi2win[] = {
+	static const unsigned char koi2win[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -573,14 +573,14 @@ mic2win1251(unsigned char *mic, unsigned char *p, int len)
 		0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda
 	};
 
-	mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win);
+	mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_WIN1251, koi2win);
 }
 
 /* alt2mic: CP866 to Mule internal code */
 static void
-alt2mic(unsigned char *l, unsigned char *p, int len)
+alt2mic(const unsigned char *l, unsigned char *p, int len)
 {
-	static unsigned char alt2koi[] = {
+	static const unsigned char alt2koi[] = {
 		0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
 		0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
 		0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
@@ -599,14 +599,14 @@ alt2mic(unsigned char *l, unsigned char *p, int len)
 		0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 	};
 
-	latin2mic_with_table(l, p, len, LC_KOI8_R, alt2koi);
+	latin2mic_with_table(l, p, len, LC_KOI8_R, PG_ALT, alt2koi);
 }
 
 /* mic2alt: Mule internal code to CP866 */
 static void
-mic2alt(unsigned char *mic, unsigned char *p, int len)
+mic2alt(const unsigned char *mic, unsigned char *p, int len)
 {
-	static unsigned char koi2alt[] = {
+	static const unsigned char koi2alt[] = {
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -625,5 +625,5 @@ mic2alt(unsigned char *mic, unsigned char *p, int len)
 		0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
 	};
 
-	mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2alt);
+	mic2latin_with_table(mic, p, len, LC_KOI8_R, PG_ALT, koi2alt);
 }
diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
index 30d7bf7634..4d3bdfe2d1 100644
--- a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c,v 1.9 2004/12/31 22:01:51 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,8 +32,8 @@ extern Datum mic_to_euc_cn(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void euc_cn2mic(unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_cn(unsigned char *mic, unsigned char *p, int len);
+static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len);
+static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 euc_cn_to_mic(PG_FUNCTION_ARGS)
@@ -71,23 +71,30 @@ mic_to_euc_cn(PG_FUNCTION_ARGS)
  * EUC_CN ---> MIC
  */
 static void
-euc_cn2mic(unsigned char *euc, unsigned char *p, int len)
+euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
 {
 	int			c1;
 
-	while (len >= 0 && (c1 = *euc++))
+	while (len > 0)
 	{
-		if (c1 & 0x80)
+		c1 = *euc;
+		if (IS_HIGHBIT_SET(c1))
 		{
-			len -= 2;
+			if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
+				report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
 			*p++ = LC_GB2312_80;
 			*p++ = c1;
-			*p++ = *euc++;
+			*p++ = euc[1];
+			euc += 2;
+			len -= 2;
 		}
 		else
 		{						/* should be ASCII */
-			len--;
+			if (c1 == 0)
+				report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
 			*p++ = c1;
+			euc++;
+			len--;
 		}
 	}
 	*p = '\0';
@@ -97,27 +104,34 @@ euc_cn2mic(unsigned char *euc, unsigned char *p, int len)
  * MIC ---> EUC_CN
  */
 static void
-mic2euc_cn(unsigned char *mic, unsigned char *p, int len)
+mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
-		if (c1 == LC_GB2312_80)
+		c1 = *mic;
+		if (IS_HIGHBIT_SET(c1))
 		{
+			if (c1 != LC_GB2312_80)
+				report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
+										   (const char *) mic, len);
+			if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			mic++;
 			*p++ = *mic++;
 			*p++ = *mic++;
-		}
-		else if (c1 > 0x7f)
-		{						/* cannot convert to EUC_CN! */
-			mic--;
-			pg_print_bogus_char(&mic, &p);
+			len -= 3;
 		}
 		else
 		{						/* should be ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
 			*p++ = c1;
+			mic++;
+			len--;
 		}
 	}
 	*p = '\0';
diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
index 0958af9cd7..6dba03b52f 100644
--- a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.9.4.3 2006/03/04 12:37:01 ishii Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.9.4.4 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,9 +22,6 @@
 #define PGSJISALTCODE 0x81ac
 #define PGEUCALTCODE 0xa2ae
 
-#define ISSJISHEAD(c) ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc))
-#define ISSJISTAIL(c) ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc))
-
 /*
  * conversion table between SJIS UDC (IBM kanji) and EUC_JP
  */
@@ -57,10 +54,10 @@ extern Datum mic_to_sjis(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
-static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
+static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len);
+static void mic2sjis(const unsigned char *mic, unsigned char *p, int len);
+static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len);
+static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 euc_jp_to_sjis(PG_FUNCTION_ARGS)
@@ -170,38 +167,34 @@ mic_to_sjis(PG_FUNCTION_ARGS)
  * SJIS ---> MIC
  */
 static void
-sjis2mic(unsigned char *sjis, unsigned char *p, int len)
+sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
 {
 	int			c1,
 				c2,
-/* Eiji Tokuya patched begin */
 				i,
 				k,
 				k2;
 
-/* Eiji Tokuya patched end */
-	while (len >= 0 && (c1 = *sjis++))
+	while (len > 0)
 	{
+		c1 = *sjis;
 		if (c1 >= 0xa1 && c1 <= 0xdf)
 		{
 			/* JIS X0201 (1 byte kana) */
-			len--;
 			*p++ = LC_JISX0201K;
 			*p++ = c1;
+			sjis++;
+			len--;
 		}
-		else if (c1 > 0x7f)
+		else if (IS_HIGHBIT_SET(c1))
 		{
 			/*
 			 * JIS X0208, X0212, user defined extended characters
 			 */
-			c2 = *sjis++;
-			if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
-				ereport(ERROR,
-						(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-					errmsg("invalid byte sequence for encoding \"SJIS\": 0x%02x%02x",
-						    c1, c2)));
+			if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
+				report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+			c2 = sjis[1];
 			k = (c1 << 8) + c2;
-/* Eiji Tokuya patched begin */
 			if (k >= 0xed40 && k < 0xf040)
 			{
 				/* NEC selection IBM kanji */
@@ -220,19 +213,15 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 			}
 
 			if (k < 0xeb3f)
-/* Eiji Tokuya patched end */
 			{
 				/* JIS X0208 */
-				len -= 2;
 				*p++ = LC_JISX0208;
 				*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
 				*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
 			}
-/* Eiji Tokuya patched begin */
 			else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
 			{
 				/* NEC selection IBM kanji - Other undecided justice */
-/* Eiji Tokuya patched end */
 				*p++ = LC_JISX0208;
 				*p++ = PGEUCALTCODE >> 8;
 				*p++ = PGEUCALTCODE & 0xff;
@@ -243,7 +232,6 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 				 * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
 				 * 0x7e7e EUC 0xf5a1 - 0xfefe
 				 */
-				len -= 2;
 				*p++ = LC_JISX0208;
 				c1 -= 0x6f;
 				*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
@@ -255,7 +243,6 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 				 * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
 				 * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
 				 */
-				len -= 2;
 				*p++ = LC_JISX0212;
 				c1 -= 0x74;
 				*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
@@ -265,9 +252,7 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 			{
 				/*
 				 * mapping IBM kanji to X0208 and X0212
-				 *
 				 */
-				len -= 2;
 				for (i = 0;; i++)
 				{
 					k2 = ibmkanji[i].sjis;
@@ -291,11 +276,16 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
 					}
 				}
 			}
+			sjis += 2;
+			len -= 2;
 		}
 		else
 		{						/* should be ASCII */
-			len--;
+			if (c1 == 0)
+				report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
 			*p++ = c1;
+			sjis++;
+			len--;
 		}
 	}
 	*p = '\0';
@@ -305,22 +295,37 @@ sjis2mic(unsigned char *sjis, unsigned char *p, int len)
  * MIC ---> SJIS
  */
 static void
-mic2sjis(unsigned char *mic, unsigned char *p, int len)
+mic2sjis(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1,
 				c2,
-				k;
+				k,
+				l;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
+		c1 = *mic;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			*p++ = c1;
+			mic++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (l < 0)
+			report_invalid_encoding(PG_MULE_INTERNAL,
+									(const char *) mic, len);
 		if (c1 == LC_JISX0201K)
-			*p++ = *mic++;
+			*p++ = mic[1];
 		else if (c1 == LC_JISX0208)
 		{
-			c1 = *mic++;
-			c2 = *mic++;
+			c1 = mic[1];
+			c2 = mic[2];
 			k = (c1 << 8) | (c2 & 0xff);
 			if (k >= 0xf5a1)
 			{
@@ -337,8 +342,8 @@ mic2sjis(unsigned char *mic, unsigned char *p, int len)
 			int			i,
 						k2;
 
-			c1 = *mic++;
-			c2 = *mic++;
+			c1 = mic[1];
+			c2 = mic[2];
 			k = c1 << 8 | c2;
 			if (k >= 0xf5a1)
 			{
@@ -369,16 +374,11 @@ mic2sjis(unsigned char *mic, unsigned char *p, int len)
 				}
 			}
 		}
-		else if (c1 > 0x7f)
-		{
-			/* cannot convert to SJIS! */
-			*p++ = PGSJISALTCODE >> 8;
-			*p++ = PGSJISALTCODE & 0xff;
-		}
 		else
-		{						/* should be ASCII */
-			*p++ = c1;
-		}
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
+									   (const char *) mic, len);
+		mic += l;
+		len -= l;
 	}
 	*p = '\0';
 }
@@ -387,37 +387,48 @@ mic2sjis(unsigned char *mic, unsigned char *p, int len)
  * EUC_JP ---> MIC
  */
 static void
-euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
+euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *euc++))
+	while (len > 0)
 	{
+		c1 = *euc;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_EUC_JP,
+										(const char *) euc, len);
+			*p++ = c1;
+			euc++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
+		if (l < 0)
+			report_invalid_encoding(PG_EUC_JP,
+									(const char *) euc, len);
 		if (c1 == SS2)
 		{						/* 1 byte kana? */
-			len -= 2;
 			*p++ = LC_JISX0201K;
-			*p++ = *euc++;
+			*p++ = euc[1];
 		}
 		else if (c1 == SS3)
 		{						/* JIS X0212 kanji? */
-			len -= 3;
 			*p++ = LC_JISX0212;
-			*p++ = *euc++;
-			*p++ = *euc++;
-		}
-		else if (c1 & 0x80)
-		{						/* kanji? */
-			len -= 2;
-			*p++ = LC_JISX0208;
-			*p++ = c1;
-			*p++ = *euc++;
+			*p++ = euc[1];
+			*p++ = euc[2];
 		}
 		else
-		{						/* should be ASCII */
-			len--;
+		{						/* kanji? */
+			*p++ = LC_JISX0208;
 			*p++ = c1;
+			*p++ = euc[1];
 		}
+		euc += l;
+		len -= l;
 	}
 	*p = '\0';
 }
@@ -426,39 +437,50 @@ euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
  * MIC ---> EUC_JP
  */
 static void
-mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
+mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
+		c1 = *mic;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			*p++ = c1;
+			mic++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (l < 0)
+			report_invalid_encoding(PG_MULE_INTERNAL,
+									(const char *) mic, len);
 		if (c1 == LC_JISX0201K)
 		{
 			*p++ = SS2;
-			*p++ = *mic++;
+			*p++ = mic[1];
 		}
 		else if (c1 == LC_JISX0212)
 		{
 			*p++ = SS3;
-			*p++ = *mic++;
-			*p++ = *mic++;
+			*p++ = mic[1];
+			*p++ = mic[2];
 		}
 		else if (c1 == LC_JISX0208)
 		{
-			*p++ = *mic++;
-			*p++ = *mic++;
-		}
-		else if (c1 > 0x7f)
-		{						/* cannot convert to EUC_JP! */
-			mic--;
-			pg_print_bogus_char(&mic, &p);
+			*p++ = mic[1];
+			*p++ = mic[2];
 		}
 		else
-		{						/* should be ASCII */
-			*p++ = c1;
-		}
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
+									   (const char *) mic, len);
+		mic += l;
+		len -= l;
 	}
 	*p = '\0';
 }
diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
index be7df0fc43..4ef0302cc8 100644
--- a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c,v 1.9 2004/12/31 22:01:56 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,8 +32,8 @@ extern Datum mic_to_euc_kr(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void euc_kr2mic(unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_kr(unsigned char *mic, unsigned char *p, int len);
+static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len);
+static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 euc_kr_to_mic(PG_FUNCTION_ARGS)
@@ -71,23 +71,34 @@ mic_to_euc_kr(PG_FUNCTION_ARGS)
  * EUC_KR ---> MIC
  */
 static void
-euc_kr2mic(unsigned char *euc, unsigned char *p, int len)
+euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *euc++))
+	while (len > 0)
 	{
-		if (c1 & 0x80)
+		c1 = *euc;
+		if (IS_HIGHBIT_SET(c1))
 		{
-			len -= 2;
+			l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
+			if (l != 2)
+				report_invalid_encoding(PG_EUC_KR,
+										(const char *) euc, len);
 			*p++ = LC_KS5601;
 			*p++ = c1;
-			*p++ = *euc++;
+			*p++ = euc[1];
+			euc += 2;
+			len -= 2;
 		}
 		else
 		{						/* should be ASCII */
-			len--;
+			if (c1 == 0)
+				report_invalid_encoding(PG_EUC_KR,
+										(const char *) euc, len);
 			*p++ = c1;
+			euc++;
+			len--;
 		}
 	}
 	*p = '\0';
@@ -97,28 +108,39 @@ euc_kr2mic(unsigned char *euc, unsigned char *p, int len)
  * MIC ---> EUC_KR
  */
 static void
-mic2euc_kr(unsigned char *mic, unsigned char *p, int len)
+mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
+		c1 = *mic;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			*p++ = c1;
+			mic++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (l < 0)
+			report_invalid_encoding(PG_MULE_INTERNAL,
+									(const char *) mic, len);
 		if (c1 == LC_KS5601)
 		{
-			*p++ = *mic++;
-			*p++ = *mic++;
-		}
-		else if (c1 > 0x7f)
-		{						/* cannot convert to EUC_KR! */
-			mic--;
-			pg_print_bogus_char(&mic, &p);
+			*p++ = mic[1];
+			*p++ = mic[2];
 		}
 		else
-		{						/* should be ASCII */
-			*p++ = c1;
-		}
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
+									   (const char *) mic, len);
+		mic += l;
+		len -= l;
 	}
 	*p = '\0';
 }
diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
index 5cd6c0b763..8e3fd653c4 100644
--- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.9 2004/12/31 22:02:07 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,10 +42,10 @@ extern Datum mic_to_big5(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void big52mic(unsigned char *big5, unsigned char *p, int len);
-static void mic2big5(unsigned char *mic, unsigned char *p, int len);
-static void euc_tw2mic(unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_tw(unsigned char *mic, unsigned char *p, int len);
+static void big52mic(const unsigned char *big5, unsigned char *p, int len);
+static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
+static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
+static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 euc_tw_to_big5(PG_FUNCTION_ARGS)
@@ -114,7 +114,7 @@ mic_to_euc_tw(PG_FUNCTION_ARGS)
 	Assert(PG_GETARG_INT32(1) == PG_EUC_TW);
 	Assert(len >= 0);
 
-	mic2big5(src, dest, len);
+	mic2euc_tw(src, dest, len);
 
 	PG_RETURN_VOID();
 }
@@ -155,39 +155,52 @@ mic_to_big5(PG_FUNCTION_ARGS)
  * EUC_TW ---> MIC
  */
 static void
-euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
+euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *euc++))
+	while (len > 0)
 	{
-		if (c1 == SS2)
+		c1 = *euc;
+		if (IS_HIGHBIT_SET(c1))
 		{
-			len -= 4;
-			c1 = *euc++;		/* plane No. */
-			if (c1 == 0xa1)
-				*p++ = LC_CNS11643_1;
-			else if (c1 == 0xa2)
-				*p++ = LC_CNS11643_2;
-			else
+			l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
+			if (l < 0)
+				report_invalid_encoding(PG_EUC_TW,
+										(const char *) euc, len);
+			if (c1 == SS2)
 			{
-				*p++ = 0x9d;	/* LCPRV2 */
-				*p++ = 0xa3 - c1 + LC_CNS11643_3;
+				c1 = euc[1];		/* plane No. */
+				if (c1 == 0xa1)
+					*p++ = LC_CNS11643_1;
+				else if (c1 == 0xa2)
+					*p++ = LC_CNS11643_2;
+				else
+				{
+					*p++ = 0x9d;	/* LCPRV2 */
+					*p++ = c1 - 0xa3 + LC_CNS11643_3;
+				}
+				*p++ = euc[2];
+				*p++ = euc[3];
 			}
-			*p++ = *euc++;
-			*p++ = *euc++;
-		}
-		else if (c1 & 0x80)
-		{						/* CNS11643-1 */
-			len -= 2;
-			*p++ = LC_CNS11643_1;
-			*p++ = c1;
-			*p++ = *euc++;
+			else
+			{						/* CNS11643-1 */
+				*p++ = LC_CNS11643_1;
+				*p++ = c1;
+				*p++ = euc[1];
+			}
+			euc += l;
+			len -= l;
 		}
 		else
 		{						/* should be ASCII */
-			len--;
+			if (c1 == 0)
+				report_invalid_encoding(PG_EUC_TW,
+										(const char *) euc, len);
 			*p++ = c1;
+			euc++;
+			len--;
 		}
 	}
 	*p = '\0';
@@ -197,42 +210,54 @@ euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
  * MIC ---> EUC_TW
  */
 static void
-mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
+mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
 {
 	int			c1;
+	int			l;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		len -= pg_mic_mblen(mic++);
-
+		c1 = *mic;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			*p++ = c1;
+			mic++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (l < 0)
+			report_invalid_encoding(PG_MULE_INTERNAL,
+									(const char *) mic, len);
 		if (c1 == LC_CNS11643_1)
 		{
-			*p++ = *mic++;
-			*p++ = *mic++;
+			*p++ = mic[1];
+			*p++ = mic[2];
 		}
 		else if (c1 == LC_CNS11643_2)
 		{
 			*p++ = SS2;
 			*p++ = 0xa2;
-			*p++ = *mic++;
-			*p++ = *mic++;
+			*p++ = mic[1];
+			*p++ = mic[2];
 		}
-		else if (c1 == 0x9d)
+		else if (c1 == 0x9d &&
+				 mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
 		{						/* LCPRV2? */
 			*p++ = SS2;
-			*p++ = *mic++ - LC_CNS11643_3 + 0xa3;
-			*p++ = *mic++;
-			*p++ = *mic++;
-		}
-		else if (c1 > 0x7f)
-		{						/* cannot convert to EUC_TW! */
-			mic--;
-			pg_print_bogus_char(&mic, &p);
+			*p++ = mic[1] - LC_CNS11643_3 + 0xa3;
+			*p++ = mic[2];
+			*p++ = mic[3];
 		}
 		else
-		{						/* should be ASCII */
-			*p++ = c1;
-		}
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
+									   (const char *) mic, len);
+		mic += l;
+		len -= l;
 	}
 	*p = '\0';
 }
@@ -241,52 +266,49 @@ mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
  * Big5 ---> MIC
  */
 static void
-big52mic(unsigned char *big5, unsigned char *p, int len)
+big52mic(const unsigned char *big5, unsigned char *p, int len)
 {
 	unsigned short c1;
 	unsigned short big5buf,
 				cnsBuf;
 	unsigned char lc;
-	char		bogusBuf[3];
-	int			i;
+	int			l;
 
-	while (len >= 0 && (c1 = *big5++))
+	while (len > 0)
 	{
-		if (c1 <= 0x7fU)
-		{						/* ASCII */
-			len--;
+		c1 = *big5;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_BIG5,
+										(const char *) big5, len);
 			*p++ = c1;
+			big5++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
+		if (l < 0)
+			report_invalid_encoding(PG_BIG5,
+									(const char *) big5, len);
+		big5buf = (c1 << 8) | big5[1];
+		cnsBuf = BIG5toCNS(big5buf, &lc);
+		if (lc != 0)
+		{
+			if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
+			{
+				*p++ = 0x9d;	/* LCPRV2 */
+			}
+			*p++ = lc;		/* Plane No. */
+			*p++ = (cnsBuf >> 8) & 0x00ff;
+			*p++ = cnsBuf & 0x00ff;
 		}
 		else
-		{
-			len -= 2;
-			big5buf = c1 << 8;
-			c1 = *big5++;
-			big5buf |= c1;
-			cnsBuf = BIG5toCNS(big5buf, &lc);
-			if (lc != 0)
-			{
-				if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
-				{
-					*p++ = 0x9d;	/* LCPRV2 */
-				}
-				*p++ = lc;		/* Plane No. */
-				*p++ = (cnsBuf >> 8) & 0x00ff;
-				*p++ = cnsBuf & 0x00ff;
-			}
-			else
-			{					/* cannot convert */
-				big5 -= 2;
-				*p++ = '(';
-				for (i = 0; i < 2; i++)
-				{
-					sprintf(bogusBuf, "%02x", *big5++);
-					*p++ = bogusBuf[0];
-					*p++ = bogusBuf[1];
-				}
-				*p++ = ')';
-			}
-		}
+			report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
+									   (const char *) big5, len);
+		big5 += l;
+		len -= l;
 	}
 	*p = '\0';
 }
@@ -295,46 +317,55 @@ big52mic(unsigned char *big5, unsigned char *p, int len)
  * MIC ---> Big5
  */
 static void
-mic2big5(unsigned char *mic, unsigned char *p, int len)
+mic2big5(const unsigned char *mic, unsigned char *p, int len)
 {
-	int			l;
 	unsigned short c1;
 	unsigned short big5buf,
 				cnsBuf;
+	int			l;
 
-	while (len >= 0 && (c1 = *mic))
+	while (len > 0)
 	{
-		l = pg_mic_mblen(mic++);
-		len -= l;
-
+		c1 = *mic;
+		if (!IS_HIGHBIT_SET(c1))
+		{
+			/* ASCII */
+			if (c1 == 0)
+				report_invalid_encoding(PG_MULE_INTERNAL,
+										(const char *) mic, len);
+			*p++ = c1;
+			mic++;
+			len--;
+			continue;
+		}
+		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
+		if (l < 0)
+			report_invalid_encoding(PG_MULE_INTERNAL,
+									(const char *) mic, len);
 		/* 0x9d means LCPRV2 */
 		if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
 		{
 			if (c1 == 0x9d)
 			{
-				c1 = *mic++;	/* get plane no. */
-			}
-			cnsBuf = (*mic++) << 8;
-			cnsBuf |= (*mic++) & 0x00ff;
-			big5buf = CNStoBIG5(cnsBuf, c1);
-			if (big5buf == 0)
-			{					/* cannot convert to Big5! */
-				mic -= l;
-				pg_print_bogus_char(&mic, &p);
+				c1 = mic[1];	/* get plane no. */
+				cnsBuf = (mic[2] << 8) | mic[3];
 			}
 			else
 			{
-				*p++ = (big5buf >> 8) & 0x00ff;
-				*p++ = big5buf & 0x00ff;
+				cnsBuf = (mic[1] << 8) | mic[2];
 			}
+			big5buf = CNStoBIG5(cnsBuf, c1);
+			if (big5buf == 0)
+				report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
+										   (const char *) mic, len);
+			*p++ = (big5buf >> 8) & 0x00ff;
+			*p++ = big5buf & 0x00ff;
 		}
-		else if (c1 <= 0x7f)	/* ASCII */
-			*p++ = c1;
 		else
-		{						/* cannot convert to Big5! */
-			mic--;
-			pg_print_bogus_char(&mic, &p);
-		}
+			report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
+									   (const char *) mic, len);
+		mic += l;
+		len -= l;
 	}
 	*p = '\0';
 }
diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
index ec2da8bf9f..6f5bb71a38 100644
--- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
+++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c,v 1.9 2004/12/31 22:02:08 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -42,10 +42,10 @@ extern Datum win1250_to_latin2(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void latin22mic(unsigned char *l, unsigned char *p, int len);
-static void mic2latin2(unsigned char *mic, unsigned char *p, int len);
-static void win12502mic(unsigned char *l, unsigned char *p, int len);
-static void mic2win1250(unsigned char *mic, unsigned char *p, int len);
+static void latin22mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2latin2(const unsigned char *mic, unsigned char *p, int len);
+static void win12502mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2win1250(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 latin2_to_mic(PG_FUNCTION_ARGS)
@@ -152,14 +152,15 @@ win1250_to_latin2(PG_FUNCTION_ARGS)
 }
 
 static void
-latin22mic(unsigned char *l, unsigned char *p, int len)
+latin22mic(const unsigned char *l, unsigned char *p, int len)
 {
-	latin2mic(l, p, len, LC_ISO8859_2);
+	latin2mic(l, p, len, LC_ISO8859_2, PG_LATIN2);
 }
+
 static void
-mic2latin2(unsigned char *mic, unsigned char *p, int len)
+mic2latin2(const unsigned char *mic, unsigned char *p, int len)
 {
-	mic2latin(mic, p, len, LC_ISO8859_2);
+	mic2latin(mic, p, len, LC_ISO8859_2, PG_LATIN2);
 }
 
 /*-----------------------------------------------------------------
@@ -167,9 +168,9 @@ mic2latin2(unsigned char *mic, unsigned char *p, int len)
  * Microsoft's CP1250(windows-1250)
  *-----------------------------------------------------------------*/
 static void
-win12502mic(unsigned char *l, unsigned char *p, int len)
+win12502mic(const unsigned char *l, unsigned char *p, int len)
 {
-	static unsigned char win1250_2_iso88592[] = {
+	static const unsigned char win1250_2_iso88592[] = {
 		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
 		0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC,
 		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
@@ -188,12 +189,14 @@ win12502mic(unsigned char *l, unsigned char *p, int len)
 		0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
 	};
 
-	latin2mic_with_table(l, p, len, LC_ISO8859_2, win1250_2_iso88592);
+	latin2mic_with_table(l, p, len, LC_ISO8859_2, PG_WIN1250,
+						 win1250_2_iso88592);
 }
+
 static void
-mic2win1250(unsigned char *mic, unsigned char *p, int len)
+mic2win1250(const unsigned char *mic, unsigned char *p, int len)
 {
-	static unsigned char iso88592_2_win1250[] = {
+	static const unsigned char iso88592_2_win1250[] = {
 		0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
 		0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00,
 		0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
@@ -212,5 +215,6 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len)
 		0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
 	};
 
-	mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250);
+	mic2latin_with_table(mic, p, len, LC_ISO8859_2, PG_WIN1250,
+						 iso88592_2_win1250);
 }
diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
index f0435da45d..097975f952 100644
--- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c,v 1.9 2004/12/31 22:02:10 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,12 +40,12 @@ extern Datum mic_to_latin4(PG_FUNCTION_ARGS);
  * ----------
  */
 
-static void latin12mic(unsigned char *l, unsigned char *p, int len);
-static void mic2latin1(unsigned char *mic, unsigned char *p, int len);
-static void latin32mic(unsigned char *l, unsigned char *p, int len);
-static void mic2latin3(unsigned char *mic, unsigned char *p, int len);
-static void latin42mic(unsigned char *l, unsigned char *p, int len);
-static void mic2latin4(unsigned char *mic, unsigned char *p, int len);
+static void latin12mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2latin1(const unsigned char *mic, unsigned char *p, int len);
+static void latin32mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2latin3(const unsigned char *mic, unsigned char *p, int len);
+static void latin42mic(const unsigned char *l, unsigned char *p, int len);
+static void mic2latin4(const unsigned char *mic, unsigned char *p, int len);
 
 Datum
 latin1_to_mic(PG_FUNCTION_ARGS)
@@ -144,32 +144,37 @@ mic_to_latin4(PG_FUNCTION_ARGS)
 }
 
 static void
-latin12mic(unsigned char *l, unsigned char *p, int len)
+latin12mic(const unsigned char *l, unsigned char *p, int len)
 {
-	latin2mic(l, p, len, LC_ISO8859_1);
+	latin2mic(l, p, len, LC_ISO8859_1, PG_LATIN1);
 }
+
 static void
-mic2latin1(unsigned char *mic, unsigned char *p, int len)
+mic2latin1(const unsigned char *mic, unsigned char *p, int len)
 {
-	mic2latin(mic, p, len, LC_ISO8859_1);
+	mic2latin(mic, p, len, LC_ISO8859_1, PG_LATIN1);
 }
+
 static void
-latin32mic(unsigned char *l, unsigned char *p, int len)
+latin32mic(const unsigned char *l, unsigned char *p, int len)
 {
-	latin2mic(l, p, len, LC_ISO8859_3);
+	latin2mic(l, p, len, LC_ISO8859_3, PG_LATIN3);
 }
+
 static void
-mic2latin3(unsigned char *mic, unsigned char *p, int len)
+mic2latin3(const unsigned char *mic, unsigned char *p, int len)
 {
-	mic2latin(mic, p, len, LC_ISO8859_3);
+	mic2latin(mic, p, len, LC_ISO8859_3, PG_LATIN3);
 }
+
 static void
-latin42mic(unsigned char *l, unsigned char *p, int len)
+latin42mic(const unsigned char *l, unsigned char *p, int len)
 {
-	latin2mic(l, p, len, LC_ISO8859_4);
+	latin2mic(l, p, len, LC_ISO8859_4, PG_LATIN4);
 }
+
 static void
-mic2latin4(unsigned char *mic, unsigned char *p, int len)
+mic2latin4(const unsigned char *mic, unsigned char *p, int len)
 {
-	mic2latin(mic, p, len, LC_ISO8859_4);
+	mic2latin(mic, p, len, LC_ISO8859_4, PG_LATIN4);
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c
index 93ce77377b..78d0b3ca83 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.9 2004/12/31 22:02:11 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,6 +43,7 @@ ascii_to_utf8(PG_FUNCTION_ARGS)
 	Assert(PG_GETARG_INT32(1) == PG_UTF8);
 	Assert(len >= 0);
 
+	/* this looks wrong, but basically we're just rejecting high-bit-set */
 	pg_ascii2mic(src, dest, len);
 
 	PG_RETURN_VOID();
@@ -59,6 +60,7 @@ utf8_to_ascii(PG_FUNCTION_ARGS)
 	Assert(PG_GETARG_INT32(1) == PG_SQL_ASCII);
 	Assert(len >= 0);
 
+	/* this looks wrong, but basically we're just rejecting high-bit-set */
 	pg_mic2ascii(src, dest, len);
 
 	PG_RETURN_VOID();
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
index 31b5b5f3e2..ea923b66fa 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.9 2004/12/31 22:02:13 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_big5(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapBIG5,
-			   sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), PG_BIG5, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
index 81dc25f361..ec91c41f8a 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.9 2004/12/31 22:02:14 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,7 +58,7 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmap_KOI8R,
-			   sizeof(ULmap_KOI8R) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmap_KOI8R) / sizeof(pg_utf_to_local), PG_KOI8R, len);
 
 	PG_RETURN_VOID();
 }
@@ -92,7 +92,7 @@ utf8_to_win1251(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmap_WIN1251,
-			   sizeof(ULmap_WIN1251) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmap_WIN1251) / sizeof(pg_utf_to_local), PG_WIN1251, len);
 
 	PG_RETURN_VOID();
 }
@@ -126,7 +126,7 @@ utf8_to_alt(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmap_ALT,
-			   sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), PG_ALT, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
index 59dfe9d0a7..25f4ce6252 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.9 2004/12/31 22:02:16 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapEUC_CN,
-			   sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), PG_EUC_CN, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
index d438148d31..38c8827789 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.9 2004/12/31 22:02:17 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapEUC_JP,
-			   sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), PG_EUC_JP, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
index 0f8668546f..4ce2feeeb0 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.9 2004/12/31 22:02:19 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapEUC_KR,
-			   sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), PG_EUC_KR, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
index 8ceb89140c..c008632229 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.9 2004/12/31 22:02:20 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapEUC_TW,
-			   sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), PG_EUC_TW, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
index ae06726158..7d532591fd 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.9 2004/12/31 22:02:23 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapGB18030,
-			   sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), PG_GB18030, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
index 04524c8497..463fedba2a 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.9 2004/12/31 22:02:26 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapGBK,
-			   sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapGBK) / sizeof(pg_utf_to_local), PG_GBK, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
index 1bb101d4da..edd8cfbd6d 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.11 2004/12/31 22:02:27 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.11.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -151,7 +151,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
 	Assert(PG_GETARG_INT32(0) == PG_UTF8);
 	Assert(len >= 0);
 
-	UtfToLocal(src, dest, maps[encoding].map2, maps[encoding].size2, len);
+	UtfToLocal(src, dest, maps[encoding].map2, maps[encoding].size2, encoding, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
index 8b5812eead..8accfb4bf6 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.10 2004/12/31 22:02:29 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c,v 1.10.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -44,15 +44,20 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
 	Assert(PG_GETARG_INT32(1) == PG_UTF8);
 	Assert(len >= 0);
 
-	while (len-- > 0 && (c = *src++))
+	while (len > 0)
 	{
-		if (c < 0x80)
+		c = *src;
+		if (c == 0)
+			report_invalid_encoding(PG_LATIN1, (const char *) src, len);
+		if (!IS_HIGHBIT_SET(c))
 			*dest++ = c;
 		else
 		{
 			*dest++ = (c >> 6) | 0xc0;
 			*dest++ = (c & 0x003f) | 0x80;
 		}
+		src++;
+		len--;
 	}
 	*dest = '\0';
 
@@ -66,31 +71,45 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
 	unsigned char *dest = PG_GETARG_CSTRING(3);
 	int			len = PG_GETARG_INT32(4);
 	unsigned short c,
-				c1,
-				c2;
+				c1;
 
 	Assert(PG_GETARG_INT32(0) == PG_UTF8);
 	Assert(PG_GETARG_INT32(1) == PG_LATIN1);
 	Assert(len >= 0);
 
-	while (len >= 0 && (c = *src++))
+	while (len > 0)
 	{
-		if ((c & 0xe0) == 0xc0)
-		{
-			c1 = c & 0x1f;
-			c2 = *src++ & 0x3f;
-			*dest = c1 << 6;
-			*dest++ |= c2;
-			len -= 2;
-		}
-		else if ((c & 0xe0) == 0xe0)
-			elog(ERROR, "could not convert UTF-8 character 0x%04x to ISO8859-1",
-				 c);
-		else
+		c = *src;
+		if (c == 0)
+			report_invalid_encoding(PG_UTF8, (const char *) src, len);
+		/* fast path for ASCII-subset characters */
+		if (!IS_HIGHBIT_SET(c))
 		{
 			*dest++ = c;
+			src++;
 			len--;
 		}
+		else
+		{
+			int		l = pg_utf_mblen(src);
+
+			if (l > len || !pg_utf8_islegal(src, l))
+				report_invalid_encoding(PG_UTF8, (const char *) src, len);
+			if (l != 2)
+				report_untranslatable_char(PG_UTF8, PG_LATIN1,
+										   (const char *) src, len);
+			c1 = src[1] & 0x3f;
+			c = ((c & 0x1f) << 6) | c1;
+			if (c >= 0x80 && c <= 0xff)
+			{
+				*dest++ = (unsigned char) c;
+				src += 2;
+				len -= 2;
+			}
+			else
+				report_untranslatable_char(PG_UTF8, PG_LATIN1,
+										   (const char *) src, len);
+		}
 	}
 	*dest = '\0';
 
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
index 3083d397ce..97bd44c3fb 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.9 2004/12/31 22:02:31 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_johab(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapJOHAB,
-			   sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), PG_JOHAB, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
index 3d3f805f1f..852ca98a56 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.9 2004/12/31 22:02:33 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapSJIS,
-			   sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), PG_SJIS, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c
index 4894d3fee8..a3d7b3b398 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c,v 1.9 2004/12/31 22:02:35 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_tcvn/utf8_and_tcvn.c,v 1.9.4.1 2006/05/21 20:06:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_tcvn(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapTCVN,
-			   sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), PG_TCVN, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
index c995ebc59a..33f6e59b05 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.9 2004/12/31 22:02:36 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,7 +62,7 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapUHC,
-			   sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapUHC) / sizeof(pg_utf_to_local), PG_UHC, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c
index 3b260a0e64..403edc27a8 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c,v 1.9 2004/12/31 22:02:38 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ utf_to_win1250(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapWIN1250,
-			   sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapWIN1250) / sizeof(pg_utf_to_local), PG_WIN1250, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c
index 1001e19e75..fcdd418fac 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c,v 1.9 2004/12/31 22:02:39 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ utf_to_win1256(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapWIN1256,
-			   sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapWIN1256) / sizeof(pg_utf_to_local), PG_WIN1256, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c b/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c
index c37c5c495b..21432c0b5f 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c,v 1.9 2004/12/31 22:02:41 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c,v 1.9.4.1 2006/05/21 20:06:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,7 +46,7 @@ utf_to_win874(PG_FUNCTION_ARGS)
 	Assert(len >= 0);
 
 	UtfToLocal(src, dest, ULmapWIN874,
-			   sizeof(ULmapWIN874) / sizeof(pg_utf_to_local), len);
+			   sizeof(ULmapWIN874) / sizeof(pg_utf_to_local), PG_WIN874, len);
 
 	PG_RETURN_VOID();
 }
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 9718e7e73e..da5f8e66c5 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
  *
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.48 2004/10/13 01:25:12 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.48.4.1 2006/05/21 20:06:16 tgl Exp $
  */
 #include "postgres.h"
 
@@ -370,9 +370,50 @@ pg_client_to_server(unsigned char *s, int len)
 	Assert(DatabaseEncoding);
 	Assert(ClientEncoding);
 
-	if (ClientEncoding->encoding == DatabaseEncoding->encoding)
+	if (len <= 0)
 		return s;
 
+	if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
+		ClientEncoding->encoding == PG_SQL_ASCII)
+	{
+		/*
+		 * No conversion is needed, but we must still validate the data.
+		 */
+		(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
+		return s;
+	}
+
+	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
+	{
+		/*
+		 * No conversion is possible, but we must still validate the data,
+		 * because the client-side code might have done string escaping
+		 * using the selected client_encoding.  If the client encoding is
+		 * ASCII-safe then we just do a straight validation under that
+		 * encoding.  For an ASCII-unsafe encoding we have a problem:
+		 * we dare not pass such data to the parser but we have no way
+		 * to convert it.  We compromise by rejecting the data if it
+		 * contains any non-ASCII characters.
+		 */
+		if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
+			(void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
+		else
+		{
+			int		i;
+
+			for (i = 0; i < len; i++)
+			{
+				if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
+					ereport(ERROR,
+							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+							 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
+									pg_enc2name_tbl[PG_SQL_ASCII].name,
+									(unsigned char) s[i])));
+			}
+		}
+		return s;
+	}
+
 	return perform_default_encoding_conversion(s, len, true);
 }
 
@@ -385,9 +426,14 @@ pg_server_to_client(unsigned char *s, int len)
 	Assert(DatabaseEncoding);
 	Assert(ClientEncoding);
 
-	if (ClientEncoding->encoding == DatabaseEncoding->encoding)
+	if (len <= 0)
 		return s;
 
+	if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
+		ClientEncoding->encoding == PG_SQL_ASCII ||
+		DatabaseEncoding->encoding == PG_SQL_ASCII)
+		return s;		/* assume data is valid */
+
 	return perform_default_encoding_conversion(s, len, false);
 }
 
@@ -406,9 +452,6 @@ perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_
 				dest_encoding;
 	FmgrInfo   *flinfo;
 
-	if (len <= 0)
-		return src;
-
 	if (is_client_to_server)
 	{
 		src_encoding = ClientEncoding->encoding;
@@ -425,12 +468,6 @@ perform_default_encoding_conversion(unsigned char *src, int len, bool is_client_
 	if (flinfo == NULL)
 		return src;
 
-	if (src_encoding == dest_encoding)
-		return src;
-
-	if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
-		return src;
-
 	result = palloc(len * 4 + 1);
 
 	FunctionCall5(flinfo,
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 3adf4481e8..96a2688695 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
 /*
  * conversion functions between pg_wchar and multibyte streams.
  * Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.40.4.1 2005/12/24 10:11:32 ishii Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.40.4.2 2006/05/21 20:06:16 tgl Exp $
  *
  * WIN1250 client encoding updated by Pavel Behal
  *
@@ -96,7 +96,7 @@ static int	pg_euc2wchar_with_len
 	return (cnt);
 }
 
-static int
+static inline int
 pg_euc_mblen(const unsigned char *s)
 {
 	int			len;
@@ -112,7 +112,7 @@ pg_euc_mblen(const unsigned char *s)
 	return (len);
 }
 
-static int
+static inline int
 pg_euc_dsplen(const unsigned char *s)
 {
 	int			len;
@@ -714,53 +714,433 @@ pg_gb18030_dsplen(const unsigned char *s)
 	return (len);
 }
 
+/*
+ *-------------------------------------------------------------------
+ * multibyte sequence validators
+ *
+ * These functions accept "s", a pointer to the first byte of a string,
+ * and "len", the remaining length of the string.  If there is a validly
+ * encoded character beginning at *s, return its length in bytes; else
+ * return -1.
+ *
+ * The functions can assume that len > 0 and that *s != '\0', but they must
+ * test for and reject zeroes in any additional bytes of a multibyte character.
+ *
+ * Note that this definition allows the function for a single-byte
+ * encoding to be just "return 1".
+ *-------------------------------------------------------------------
+ */
 
+static int
+pg_ascii_verifier(const unsigned char *s, int len)
+{
+	return 1;
+}
+
+#define IS_EUC_RANGE_VALID(c)	((c) >= 0xa1 && (c) <= 0xfe)
+
+static int
+pg_eucjp_verifier(const unsigned char *s, int len)
+{
+	int			l;
+	unsigned char c1, c2;
+
+	c1 = *s++;
+
+	switch (c1)
+	{
+		case SS2:		/* JIS X 0201 */
+			l = 2;
+			if (l > len)
+				return -1;
+			c2 = *s++;
+			if (c2 < 0xa1 || c2 > 0xdf)
+				return -1;
+			break;
+
+		case SS3:		/* JIS X 0212 */
+			l = 3;
+			if (l > len)
+				return -1;
+			c2 = *s++;
+			if (!IS_EUC_RANGE_VALID(c2))
+				return -1;
+			c2 = *s++;
+			if (!IS_EUC_RANGE_VALID(c2))
+				return -1;
+			break;
+
+		default:
+			if (IS_HIGHBIT_SET(c1))		/* JIS X 0208? */
+			{
+				l = 2;
+				if (l > len)
+					return -1;
+				if (!IS_EUC_RANGE_VALID(c1))
+					return -1;
+				c2 = *s++;
+				if (!IS_EUC_RANGE_VALID(c2))
+					return -1;
+			}
+			else		/* must be ASCII */
+			{
+				l = 1;
+			}
+			break;
+	}
+
+	return l;
+}
+
+static int
+pg_euckr_verifier(const unsigned char *s, int len)
+{
+	int			l;
+	unsigned char c1, c2;
+
+	c1 = *s++;
+
+	if (IS_HIGHBIT_SET(c1))
+	{
+		l = 2;
+		if (l > len)
+			return -1;
+		if (!IS_EUC_RANGE_VALID(c1))
+			return -1;
+		c2 = *s++;
+		if (!IS_EUC_RANGE_VALID(c2))
+			return -1;
+	}
+	else		/* must be ASCII */
+	{
+		l = 1;
+	}
+
+	return l;
+}
+
+/* EUC-CN byte sequences are exactly same as EUC-KR */
+#define pg_euccn_verifier	pg_euckr_verifier
+
+static int
+pg_euctw_verifier(const unsigned char *s, int len)
+{
+	int			l;
+	unsigned char c1, c2;
+
+	c1 = *s++;
+
+	switch (c1)
+	{
+		case SS2:		/* CNS 11643 Plane 1-7 */
+			l = 4;
+			if (l > len)
+				return -1;
+			c2 = *s++;
+			if (c2 < 0xa1 || c2 > 0xa7)
+				return -1;
+			c2 = *s++;
+			if (!IS_EUC_RANGE_VALID(c2))
+				return -1;
+			c2 = *s++;
+			if (!IS_EUC_RANGE_VALID(c2))
+				return -1;
+			break;
+
+		case SS3:		/* unused */
+			return -1;
+
+		default:
+			if (IS_HIGHBIT_SET(c1))		/* CNS 11643 Plane 1 */
+			{
+				l = 2;
+				if (l > len)
+					return -1;
+				/* no further range check on c1? */
+				c2 = *s++;
+				if (!IS_EUC_RANGE_VALID(c2))
+					return -1;
+			}
+			else		/* must be ASCII */
+			{
+				l = 1;
+			}
+			break;
+	}
+	return l;
+}
+
+static int
+pg_johab_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+	unsigned char c;
+
+	l = mbl = pg_johab_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	if (!IS_HIGHBIT_SET(*s))
+		return mbl;
+
+	while (--l > 0)
+	{
+		c = *++s;
+		if (!IS_EUC_RANGE_VALID(c))
+			return -1;
+	}
+	return mbl;
+}
+
+static int
+pg_mule_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+	unsigned char c;
+
+	l = mbl = pg_mule_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	while (--l > 0)
+	{
+		c = *++s;
+		if (!IS_HIGHBIT_SET(c))
+			return -1;
+	}
+	return mbl;
+}
+
+static int
+pg_latin1_verifier(const unsigned char *s, int len)
+{
+	return 1;
+}
+
+static int
+pg_sjis_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+	unsigned char c1, c2;
+
+	l = mbl = pg_sjis_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	if (l == 1)					/* pg_sjis_mblen already verified it */
+		return mbl;
+
+	c1 = *s++;
+	c2 = *s;
+	if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
+		return -1;
+	return mbl;
+}
+
+static int
+pg_big5_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+
+	l = mbl = pg_big5_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	while (--l > 0)
+	{
+		if (*++s == '\0')
+			return -1;
+	}
+
+	return mbl;
+}
+
+static int
+pg_gbk_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+
+	l = mbl = pg_gbk_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	while (--l > 0)
+	{
+		if (*++s == '\0')
+			return -1;
+	}
+
+	return mbl;
+}
+
+static int
+pg_uhc_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+
+	l = mbl = pg_uhc_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	while (--l > 0)
+	{
+		if (*++s == '\0')
+			return -1;
+	}
+
+	return mbl;
+}
+
+static int
+pg_gb18030_verifier(const unsigned char *s, int len)
+{
+	int l, mbl;
+
+	l = mbl = pg_gb18030_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	while (--l > 0)
+	{
+		if (*++s == '\0')
+			return -1;
+	}
+
+	return mbl;
+}
+
+static int
+pg_utf8_verifier(const unsigned char *s, int len)
+{
+	int l = pg_utf_mblen(s);
+
+	if (len < l)
+		return -1;
+
+	if (!pg_utf8_islegal(s, l))
+		return -1;
+
+	return l;
+}
+
+/*
+ * Check for validity of a single UTF-8 encoded character
+ *
+ * This directly implements the rules in RFC3629, modified to restrict
+ * us to 16-bit Unicode code points (hence, at most 3 bytes in UTF8).
+ * The bizarre-looking
+ * restrictions on the second byte are meant to ensure that there isn't
+ * more than one encoding of a given Unicode character point; that is,
+ * you may not use a longer-than-necessary byte sequence with high order
+ * zero bits to represent a character that would fit in fewer bytes.
+ * To do otherwise is to create security hazards (eg, create an apparent
+ * non-ASCII character that decodes to plain ASCII).
+ *
+ * length is assumed to have been obtained by pg_utf_mblen(), and the
+ * caller must have checked that that many bytes are present in the buffer.
+ */
+bool
+pg_utf8_islegal(const unsigned char *source, int length)
+{
+	unsigned char a;
+
+	switch (length)
+	{
+		default:
+			/* reject lengths 4, 5 and 6 for now */
+			return false;
+		case 3:
+			a = source[2];
+			if (a < 0x80 || a > 0xBF)
+				return false;
+			/* FALL THRU */
+		case 2:
+			a = source[1];
+			switch (*source)
+			{
+				case 0xE0:
+					if (a < 0xA0 || a > 0xBF)
+						return false;
+					break;
+				case 0xED:
+					if (a < 0x80 || a > 0x9F)
+						return false;
+					break;
+				default:
+					if (a < 0x80 || a > 0xBF)
+						return false;
+					break;
+			}
+			/* FALL THRU */
+		case 1:
+			a = *source;
+			if (a >= 0x80 && a < 0xC2)
+				return false;
+			if (a > 0xEF)
+				return false;
+			break;
+	}
+	return true;
+}
+
+/*
+ *-------------------------------------------------------------------
+ * encoding info table
+ *-------------------------------------------------------------------
+ */
 pg_wchar_tbl pg_wchar_table[] = {
-	{pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, 1},		/* 0; PG_SQL_ASCII	*/
-	{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, 3},		/* 1; PG_EUC_JP */
-	{pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, 3},		/* 2; PG_EUC_CN */
-	{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3},		/* 3; PG_EUC_KR */
-	{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3},		/* 4; PG_EUC_TW */
-	{pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3},		/* 5; PG_JOHAB */
-	{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 3},	/* 6; PG_UNICODE */
-	{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 8; PG_LATIN1 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 9; PG_LATIN2 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 10; PG_LATIN3 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 11; PG_LATIN4 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 12; PG_LATIN5 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 13; PG_LATIN6 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 14; PG_LATIN7 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 15; PG_LATIN8 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 16; PG_LATIN9 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 17; PG_LATIN10 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 18; PG_WIN1256 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 19; PG_TCVN */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 20; PG_WIN874 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 21; PG_KOI8 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 22; PG_WIN1251 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 23; PG_ALT */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 24; ISO-8859-5 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 25; ISO-8859-6 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 26; ISO-8859-7 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 27; ISO-8859-8 */
-	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},	/* 28; PG_WIN1250 */
-	{0, pg_sjis_mblen, pg_sjis_dsplen, 2},		/* 29; PG_SJIS */
-	{0, pg_big5_mblen, pg_big5_dsplen, 2},		/* 30; PG_BIG5 */
-	{0, pg_gbk_mblen, pg_gbk_dsplen, 2},		/* 31; PG_GBK */
-	{0, pg_uhc_mblen, pg_uhc_dsplen, 2},		/* 32; PG_UHC */
-	{0, pg_gb18030_mblen, pg_gb18030_dsplen, 2} /* 33; PG_GB18030 */
+	{pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1},		/* 0; PG_SQL_ASCII	*/
+	{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3},		/* 1; PG_EUC_JP */
+	{pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 3},		/* 2; PG_EUC_CN */
+	{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3},		/* 3; PG_EUC_KR */
+	{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 3},		/* 4; PG_EUC_TW */
+	{pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3},		/* 5; PG_JOHAB */
+	{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 3},	/* 6; PG_UNICODE */
+	{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 3}, /* 7; PG_MULE_INTERNAL */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 8; PG_LATIN1 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 9; PG_LATIN2 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 10; PG_LATIN3 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 11; PG_LATIN4 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 12; PG_LATIN5 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 13; PG_LATIN6 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 14; PG_LATIN7 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 15; PG_LATIN8 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 16; PG_LATIN9 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 17; PG_LATIN10 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 18; PG_WIN1256 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 19; PG_TCVN */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 20; PG_WIN874 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 21; PG_KOI8 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 22; PG_WIN1251 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 23; PG_ALT */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 24; ISO-8859-5 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 25; ISO-8859-6 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 26; ISO-8859-7 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 27; ISO-8859-8 */
+	{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},	/* 28; PG_WIN1250 */
+	{0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2},		/* 29; PG_SJIS */
+	{0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2},		/* 30; PG_BIG5 */
+	{0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},		/* 31; PG_GBK */
+	{0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2},		/* 32; PG_UHC */
+	{0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 2} /* 33; PG_GB18030 */
 };
 
 /* returns the byte length of a word for mule internal code */
 int
 pg_mic_mblen(const unsigned char *mbstr)
 {
-	return (pg_mule_mblen(mbstr));
+	return pg_mule_mblen(mbstr);
 }
 
 /*
- * Returns the byte length of a multibyte word.
+ * Returns the byte length of a multibyte character.
  */
 int
 pg_encoding_mblen(int encoding, const unsigned char *mbstr)
@@ -769,12 +1149,12 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr)
 
 	return ((encoding >= 0 &&
 			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
-			((*pg_wchar_table[encoding].mblen) (mbstr)) :
-			((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
+		((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) :
+	((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr)));
 }
 
 /*
- * Returns the display length of a multibyte word.
+ * Returns the display length of a multibyte character.
  */
 int
 pg_encoding_dsplen(int encoding, const unsigned char *mbstr)
@@ -783,12 +1163,28 @@ pg_encoding_dsplen(int encoding, const unsigned char *mbstr)
 
 	return ((encoding >= 0 &&
 			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
-			((*pg_wchar_table[encoding].dsplen) (mbstr)) :
-			((*pg_wchar_table[PG_SQL_ASCII].dsplen) (mbstr)));
+	   ((*pg_wchar_table[encoding].dsplen) ((const unsigned char *) mbstr)) :
+	((*pg_wchar_table[PG_SQL_ASCII].dsplen) ((const unsigned char *) mbstr)));
 }
 
 /*
- * fetch maximum length of a char encoding
+ * Verify the first multibyte character of the given string.
+ * Return its byte length if good, -1 if bad.  (See comments above for
+ * full details of the mbverify API.)
+ */
+int
+pg_encoding_verifymb(int encoding, const char *mbstr, int len)
+{
+	Assert(PG_VALID_ENCODING(encoding));
+
+	return ((encoding >= 0 &&
+			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
+		((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) :
+	((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len)));
+}
+
+/*
+ * fetch maximum length of a given encoding
  */
 int
 pg_encoding_max_length(int encoding)
@@ -801,78 +1197,7 @@ pg_encoding_max_length(int encoding)
 #ifndef FRONTEND
 
 /*
- * Verify mbstr to make sure that it has a valid character sequence.
- * mbstr is not necessarily NULL terminated; length of mbstr is
- * specified by len.
- *
- * If OK, return TRUE.	If a problem is found, return FALSE when noError is
- * true; when noError is false, ereport() a descriptive message.
- */
-bool
-pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
-{
-	int			l;
-	int			i;
-	int			encoding;
-
-	/* we do not need any check in single-byte encodings */
-	if (pg_database_encoding_max_length() <= 1)
-		return true;
-
-	encoding = GetDatabaseEncoding();
-
-	while (len > 0 && *mbstr)
-	{
-		/* special UTF-8 check */
-		if (encoding == PG_UTF8 && (*mbstr & 0xf8) == 0xf0)
-		{
-			if (noError)
-				return false;
-			ereport(ERROR,
-					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-					 errmsg("Unicode characters greater than or equal to 0x10000 are not supported")));
-		}
-
-		l = pg_mblen(mbstr);
-
-		for (i = 1; i < l; i++)
-		{
-			/*
-			 * we expect that every multibyte char consists of bytes
-			 * having the 8th bit set
-			 */
-			if (i >= len || (mbstr[i] & 0x80) == 0)
-			{
-				char		buf[8 * 2 + 1];
-				char	   *p = buf;
-				int			j,
-							jlimit;
-
-				if (noError)
-					return false;
-
-				jlimit = Min(l, len);
-				jlimit = Min(jlimit, 8);		/* prevent buffer overrun */
-
-				for (j = 0; j < jlimit; j++)
-					p += sprintf(p, "%02x", mbstr[j]);
-
-				ereport(ERROR,
-						(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-				errmsg("invalid byte sequence for encoding \"%s\": 0x%s",
-					   GetDatabaseEncodingName(), buf)));
-			}
-		}
-
-		len -= l;
-		mbstr += l;
-	}
-
-	return true;
-}
-
-/*
- * fetch maximum length of a char encoding for the current database
+ * fetch maximum length of the encoding for the current database
  */
 int
 pg_database_encoding_max_length(void)
@@ -880,4 +1205,139 @@ pg_database_encoding_max_length(void)
 	return pg_wchar_table[GetDatabaseEncoding()].maxmblen;
 }
 
+/*
+ * Verify mbstr to make sure that it is validly encoded in the current
+ * database encoding.  Otherwise same as pg_verify_mbstr().
+ */
+bool
+pg_verifymbstr(const char *mbstr, int len, bool noError)
+{
+	return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
+}
+
+/*
+ * Verify mbstr to make sure that it is validly encoded in the specified
+ * encoding.
+ *
+ * mbstr is not necessarily zero terminated; length of mbstr is
+ * specified by len.
+ *
+ * If OK, return TRUE.	If a problem is found, return FALSE when noError is
+ * true; when noError is false, ereport() a descriptive message.
+ */
+bool
+pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
+{
+	mbverifier	mbverify;
+
+	Assert(PG_VALID_ENCODING(encoding));
+
+	/*
+	 * In single-byte encodings, we need only reject nulls (\0).
+	 */
+	if (pg_encoding_max_length(encoding) <= 1)
+	{
+		const char *nullpos = memchr(mbstr, 0, len);
+
+		if (nullpos == NULL)
+			return true;
+		if (noError)
+			return false;
+		report_invalid_encoding(encoding, nullpos, 1);
+	}
+
+	/* fetch function pointer just once */
+	mbverify = pg_wchar_table[encoding].mbverify;
+
+	while (len > 0)
+	{
+		int			l;
+
+		/* fast path for ASCII-subset characters */
+		if (!IS_HIGHBIT_SET(*mbstr))
+		{
+			if (*mbstr != '\0')
+			{
+				mbstr++;
+				len--;
+				continue;
+			}
+			if (noError)
+				return false;
+			report_invalid_encoding(encoding, mbstr, len);
+		}
+
+		l = (*mbverify) ((const unsigned char *) mbstr, len);
+
+		if (l < 0)
+		{
+			if (noError)
+				return false;
+			report_invalid_encoding(encoding, mbstr, len);
+		}
+
+		mbstr += l;
+		len -= l;
+	}
+	return true;
+}
+
+/*
+ * report_invalid_encoding: complain about invalid multibyte character
+ *
+ * note: len is remaining length of string, not length of character;
+ * len must be greater than zero, as we always examine the first byte.
+ */
+void
+report_invalid_encoding(int encoding, const char *mbstr, int len)
+{
+	int			l = pg_encoding_mblen(encoding, mbstr);
+	char		buf[8 * 2 + 1];
+	char	   *p = buf;
+	int			j,
+				jlimit;
+
+	jlimit = Min(l, len);
+	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+
+	for (j = 0; j < jlimit; j++)
+		p += sprintf(p, "%02x", (unsigned char) mbstr[j]);
+
+	ereport(ERROR,
+			(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+			 errmsg("invalid byte sequence for encoding \"%s\": 0x%s",
+					pg_enc2name_tbl[encoding].name,
+					buf)));
+}
+
+/*
+ * report_untranslatable_char: complain about untranslatable character
+ *
+ * note: len is remaining length of string, not length of character;
+ * len must be greater than zero, as we always examine the first byte.
+ */
+void
+report_untranslatable_char(int src_encoding, int dest_encoding,
+						   const char *mbstr, int len)
+{
+	int			l = pg_encoding_mblen(src_encoding, mbstr);
+	char		buf[8 * 2 + 1];
+	char	   *p = buf;
+	int			j,
+				jlimit;
+
+	jlimit = Min(l, len);
+	jlimit = Min(jlimit, 8);	/* prevent buffer overrun */
+
+	for (j = 0; j < jlimit; j++)
+		p += sprintf(p, "%02x", (unsigned char) mbstr[j]);
+
+	ereport(ERROR,
+			(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+			 errmsg("character 0x%s of encoding \"%s\" has no equivalent in \"%s\"",
+					buf,
+					pg_enc2name_tbl[src_encoding].name,
+					pg_enc2name_tbl[dest_encoding].name)));
+}
+
 #endif
diff --git a/src/include/c.h b/src/include/c.h
index 08ce6dd243..9bdbcd8184 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/c.h,v 1.178.4.1 2005/07/18 15:53:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/c.h,v 1.178.4.2 2006/05/21 20:06:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -670,6 +670,8 @@ typedef NameData *Name;
 
 /* msb for char */
 #define CSIGNBIT (0x80)
+#define HIGHBIT					(0x80)
+#define IS_HIGHBIT_SET(ch)		((unsigned char)(ch) & HIGHBIT)
 
 #define STATUS_OK				(0)
 #define STATUS_ERROR			(-1)
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 568679bf44..17e68edf6c 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.56 2004/12/04 18:19:33 momjian Exp $ */
+/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.56.4.1 2006/05/21 20:06:18 tgl Exp $ */
 
 #ifndef PG_WCHAR_H
 #define PG_WCHAR_H
@@ -23,11 +23,17 @@ typedef unsigned int pg_wchar;
 #define SS2 0x8e				/* single shift 2 (JIS0201) */
 #define SS3 0x8f				/* single shift 3 (JIS0212) */
 
+/*
+ * SJIS validation macros
+ */
+#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
+#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
+
 /*
  * Leading byte types or leading prefix byte for MULE internal code.
  * See http://www.xemacs.org for more details.	(there is a doc titled
  * "XEmacs Internals Manual", "MULE Character Sets and Encodings"
- * section.
+ * section.)
  */
 /*
  * Is a leading byte for "official" single byte encodings?
@@ -64,7 +70,7 @@ typedef unsigned int pg_wchar;
 #define LC_ISO8859_8	0x88	/* Hebrew (not supported yet) */
 #define LC_JISX0201K	0x89	/* Japanese 1 byte kana */
 #define LC_JISX0201R	0x8a	/* Japanese 1 byte Roman */
-/* Note that 0x8b seems to be unused in as of Emacs 20.7.
+/* Note that 0x8b seems to be unused as of Emacs 20.7.
  * However, there might be a chance that 0x8b could be used
  * in later version of Emacs.
  */
@@ -137,13 +143,13 @@ typedef unsigned int pg_wchar;
 /* #define FREE		0xff	free (unused) */
 
 /*
- * Encoding numeral identificators
+ * PostgreSQL encoding identifiers
  *
  * WARNING: the order of this table must be same as order
  *			in the pg_enc2name[] (mb/encnames.c) array!
  *
- *			If you add some encoding don'y forget check
- *			PG_ENCODING_[BE|FE]_LAST macros.
+ *			If you add some encoding don't forget to check
+ *			PG_ENCODING_BE_LAST macro.
  *
  *		The PG_SQL_ASCII is default encoding and must be = 0.
  */
@@ -199,14 +205,13 @@ typedef enum pg_enc
 #define PG_VALID_BE_ENCODING(_enc) \
 		((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
 
-#define PG_ENCODING_IS_CLIEN_ONLY(_enc) \
-		(((_enc) > PG_ENCODING_BE_LAST && (_enc) <= PG_ENCODING_FE_LAST)
+#define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
+		((_enc) > PG_ENCODING_BE_LAST && (_enc) <= PG_ENCODING_FE_LAST)
 
 #define PG_VALID_ENCODING(_enc) \
 		((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
 
-/* On FE are possible all encodings
- */
+/* On FE are possible all encodings */
 #define PG_VALID_FE_ENCODING(_enc)	PG_VALID_ENCODING(_enc)
 
 /*
@@ -246,18 +251,21 @@ extern const char *pg_encoding_to_char(int encoding);
 typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
 														pg_wchar *to,
 														int len);
+
 typedef int (*mblen_converter) (const unsigned char *mbstr);
 
 typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
 
+typedef int (*mbverifier) (const unsigned char *mbstr, int len);
+
 typedef struct
 {
 	mb2wchar_with_len_converter mb2wchar_with_len;		/* convert a multibyte
 														 * string to a wchar */
-	mblen_converter mblen;		/* returns the length of a multibyte char */
-	mbdisplaylen_converter dsplen;		/* returns the lenghth of a
-										 * display length */
-	int			maxmblen;		/* max bytes for a char in this charset */
+	mblen_converter mblen;		/* get byte length of a char */
+	mbdisplaylen_converter dsplen;		/* get display width of a char */
+	mbverifier	mbverify;		/* verify multibyte sequence */
+	int			maxmblen;		/* max bytes for a char in this encoding */
 } pg_wchar_tbl;
 
 extern pg_wchar_tbl pg_wchar_table[];
@@ -290,6 +298,7 @@ extern int	pg_mblen(const unsigned char *mbstr);
 extern int	pg_dsplen(const unsigned char *mbstr);
 extern int	pg_encoding_mblen(int encoding, const unsigned char *mbstr);
 extern int	pg_encoding_dsplen(int encoding, const unsigned char *mbstr);
+extern int	pg_encoding_verifymb(int encoding, const char *mbstr, int len);
 extern int	pg_mule_mblen(const unsigned char *mbstr);
 extern int	pg_mic_mblen(const unsigned char *mbstr);
 extern int	pg_mbstrlen(const unsigned char *mbstr);
@@ -323,20 +332,33 @@ extern unsigned char *pg_server_to_client(unsigned char *s, int len);
 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 
-extern void LocalToUtf(unsigned char *iso, unsigned char *utf,
-		   pg_local_to_utf *map, int size, int encoding, int len);
+extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
+		   const pg_local_to_utf *map, int size, int encoding, int len);
 
-extern void UtfToLocal(unsigned char *utf, unsigned char *iso,
-		   pg_utf_to_local *map, int size, int len);
+extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
+		   const pg_utf_to_local *map, int size, int encoding, int len);
 
-extern bool pg_verifymbstr(const unsigned char *mbstr, int len, bool noError);
+extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
+extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
+							bool noError);
 
-extern void pg_ascii2mic(unsigned char *src, unsigned char *dest, int len);
-extern void pg_mic2ascii(unsigned char *src, unsigned char *dest, int len);
-extern void pg_print_bogus_char(unsigned char **mic, unsigned char **p);
-extern void latin2mic(unsigned char *l, unsigned char *p, int len, int lc);
-extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
-extern void latin2mic_with_table(unsigned char *l, unsigned char *p, int len, int lc, unsigned char *tab);
-extern void mic2latin_with_table(unsigned char *mic, unsigned char *p, int len, int lc, unsigned char *tab);
+extern void report_invalid_encoding(int encoding, const char *mbstr, int len);
+extern void report_untranslatable_char(int src_encoding, int dest_encoding,
+									   const char *mbstr, int len);
+
+extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len);
+extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len);
+extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
+					  int lc, int encoding);
+extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
+					  int lc, int encoding);
+extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
+								 int len, int lc, int encoding,
+								 const unsigned char *tab);
+extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
+								 int len, int lc, int encoding,
+								 const unsigned char *tab);
+
+extern bool pg_utf8_islegal(const unsigned char *source, int length);
 
 #endif   /* PG_WCHAR_H */