mirror of
https://git.postgresql.org/git/postgresql.git
synced 2025-01-12 18:34:36 +08:00
8fcdac9987
- pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch.
185 lines
3.5 KiB
C
185 lines
3.5 KiB
C
#include "ts_locale.h"
|
|
|
|
#include "utils/builtins.h"
|
|
#include "utils/pg_locale.h"
|
|
#include "mb/pg_wchar.h"
|
|
|
|
|
|
#ifdef TS_USE_WIDE
|
|
|
|
#ifdef WIN32
|
|
|
|
size_t
|
|
wchar2char(char *to, const wchar_t *from, size_t len)
|
|
{
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
if (GetDatabaseEncoding() == PG_UTF8)
|
|
{
|
|
int r;
|
|
|
|
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
|
|
NULL, NULL);
|
|
|
|
if (r == 0)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
errmsg("UTF-16 to UTF-8 translation failed: %lu",
|
|
GetLastError())));
|
|
Assert(r <= len);
|
|
|
|
return r;
|
|
}
|
|
|
|
return wcstombs(to, from, len);
|
|
}
|
|
#endif /* WIN32 */
|
|
|
|
size_t
|
|
char2wchar(wchar_t *to, const char *from, size_t len)
|
|
{
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
#ifdef WIN32
|
|
if (GetDatabaseEncoding() == PG_UTF8)
|
|
{
|
|
int r;
|
|
|
|
r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
|
|
|
|
if (!r)
|
|
{
|
|
pg_verifymbstr(from, strlen(from), false);
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
errmsg("invalid multibyte character for locale"),
|
|
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
|
}
|
|
|
|
Assert(r <= len);
|
|
|
|
return r;
|
|
}
|
|
else
|
|
#endif /* WIN32 */
|
|
|
|
Assert( !lc_ctype_is_c() );
|
|
return mbstowcs(to, from, len);
|
|
}
|
|
|
|
int
|
|
_t_isalpha(const char *ptr)
|
|
{
|
|
wchar_t character[2];
|
|
|
|
if (lc_ctype_is_c())
|
|
return isalpha(TOUCHAR(ptr));
|
|
|
|
char2wchar(character, ptr, 1);
|
|
|
|
return iswalpha((wint_t) *character);
|
|
}
|
|
|
|
int
|
|
_t_isprint(const char *ptr)
|
|
{
|
|
wchar_t character[2];
|
|
|
|
if (lc_ctype_is_c())
|
|
return isprint(TOUCHAR(ptr));
|
|
|
|
char2wchar(character, ptr, 1);
|
|
|
|
return iswprint((wint_t) *character);
|
|
}
|
|
#endif /* TS_USE_WIDE */
|
|
|
|
char *
|
|
lowerstr(char *str)
|
|
{
|
|
char *ptr = str;
|
|
char *out;
|
|
int len = strlen(str);
|
|
|
|
if ( len == 0 )
|
|
return pstrdup("");
|
|
|
|
#ifdef TS_USE_WIDE
|
|
|
|
/*
|
|
* Use wide char code only when max encoding length > 1 and ctype != C.
|
|
* Some operating systems fail with multi-byte encodings and a C locale.
|
|
* Also, for a C locale there is no need to process as multibyte. From
|
|
* backend/utils/adt/oracle_compat.c Teodor
|
|
*/
|
|
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
|
|
{
|
|
wchar_t *wstr,
|
|
*wptr;
|
|
int wlen;
|
|
|
|
/*
|
|
*alloc number of wchar_t for worst case, len contains
|
|
* number of bytes <= number of characters and
|
|
* alloc 1 wchar_t for 0, because wchar2char(wcstombs in really)
|
|
* wants zero-terminated string
|
|
*/
|
|
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));
|
|
|
|
/*
|
|
* str SHOULD be cstring, so wlen contains number
|
|
* of converted character
|
|
*/
|
|
wlen = char2wchar(wstr, str, len);
|
|
if ( wlen < 0 )
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
errmsg("translation failed from server encoding to wchar_t")));
|
|
|
|
Assert(wlen<=len);
|
|
wstr[wlen] = 0;
|
|
|
|
while (*wptr)
|
|
{
|
|
*wptr = towlower((wint_t) *wptr);
|
|
wptr++;
|
|
}
|
|
|
|
/*
|
|
* Alloc result string for worst case + '\0'
|
|
*/
|
|
len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
|
|
out = (char*)palloc(len);
|
|
|
|
/*
|
|
* wlen now is number of bytes which is always >= number of characters
|
|
*/
|
|
wlen = wchar2char(out, wstr, len);
|
|
pfree(wstr);
|
|
|
|
if ( wlen < 0 )
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
errmsg("translation failed from wchar_t to server encoding %d", errno)));
|
|
Assert(wlen<=len);
|
|
out[wlen]='\0';
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
char *outptr;
|
|
|
|
outptr = out = (char*)palloc( sizeof(char) * (len+1) );
|
|
while (*ptr)
|
|
{
|
|
*outptr++ = tolower(*(unsigned char *) ptr);
|
|
ptr++;
|
|
}
|
|
*outptr = '\0';
|
|
}
|
|
|
|
return out;
|
|
}
|