Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()

except the caller can specify the encoding to work in; this will be needed
for pg_stat_statements.  In passing, do some marginal efficiency hacking
and clean up some comments.  Also, prevent the single-byte-encoding code
path from fetching one byte past the stated length of the string (this
last is a bug that might need to be back-patched at some point).
This commit is contained in:
Tom Lane 2009-01-04 18:37:36 +00:00
parent 74ef810ca6
commit 1efd5ff89b
2 changed files with 38 additions and 23 deletions

View File

@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $
*/
#include "postgres.h"
@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding,
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
}
/* returns the byte length of a multibyte word */
/* returns the byte length of a multibyte character */
int
pg_mblen(const char *mbstr)
{
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
}
/* returns the display length of a multibyte word */
/* returns the display length of a multibyte character */
int
pg_dsplen(const char *mbstr)
{
@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
/*
* returns the byte length of a multibyte string
* (not necessarily NULL terminated)
* (not necessarily NULL terminated)
* that is no longer than limit.
* this function does not break multibyte word boundary.
* this function does not break multibyte character boundary.
*/
int
pg_mbcliplen(const char *mbstr, int len, int limit)
{
return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
len, limit);
}
/*
* pg_mbcliplen with specified encoding
*/
int
pg_encoding_mbcliplen(int encoding, const char *mbstr,
int len, int limit)
{
mblen_converter mblen_fn;
int clen = 0;
int l;
/* optimization for single byte encoding */
if (pg_database_encoding_max_length() == 1)
if (pg_encoding_max_length(encoding) == 1)
return cliplen(mbstr, len, limit);
mblen_fn = pg_wchar_table[encoding].mblen;
while (len > 0 && *mbstr)
{
l = pg_mblen(mbstr);
l = (*mblen_fn) ((const unsigned char *) mbstr);
if ((clen + l) > limit)
break;
clen += l;
@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
/*
* Similar to pg_mbcliplen except the limit parameter specifies the
* character length, not the byte length. */
* character length, not the byte length.
*/
int
pg_mbcharcliplen(const char *mbstr, int len, int limit)
{
@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
return clen;
}
/* mbcliplen for any single-byte encoding */
static int
cliplen(const char *str, int len, int limit)
{
int l = 0;
len = Min(len, limit);
while (l < len && str[l])
l++;
return l;
}
void
SetDatabaseEncoding(int encoding)
{
@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS)
Assert(ClientEncoding);
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
}
static int
cliplen(const char *str, int len, int limit)
{
int l = 0;
const char *s;
for (s = str; *s; s++, l++)
{
if (l >= len || l >= limit)
return l;
}
return (s - str);
}

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
@ -358,6 +358,8 @@ extern int pg_mic_mblen(const unsigned char *mbstr);
extern int pg_mbstrlen(const char *mbstr);
extern int pg_mbstrlen_with_len(const char *mbstr, int len);
extern int pg_mbcliplen(const char *mbstr, int len, int limit);
extern int pg_encoding_mbcliplen(int encoding, const char *mbstr,
int len, int limit);
extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
extern int pg_encoding_max_length(int encoding);
extern int pg_database_encoding_max_length(void);