mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-12-21 08:29:39 +08:00
Allow direct conversion between EUC_JP and SJIS to improve
performance. patches submitted by Atsushi Ogawa.
This commit is contained in:
parent
1fa87fa780
commit
e2d088de03
@ -6,7 +6,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -58,6 +58,8 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
|
||||
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
|
||||
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
|
||||
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
|
||||
static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len);
|
||||
static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len);
|
||||
|
||||
Datum
|
||||
euc_jp_to_sjis(PG_FUNCTION_ARGS)
|
||||
@ -65,16 +67,12 @@ euc_jp_to_sjis(PG_FUNCTION_ARGS)
|
||||
unsigned char *src = PG_GETARG_CSTRING(2);
|
||||
unsigned char *dest = PG_GETARG_CSTRING(3);
|
||||
int len = PG_GETARG_INT32(4);
|
||||
unsigned char *buf;
|
||||
|
||||
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
|
||||
Assert(PG_GETARG_INT32(1) == PG_SJIS);
|
||||
Assert(len >= 0);
|
||||
|
||||
buf = palloc(len * ENCODING_GROWTH_RATE);
|
||||
euc_jp2mic(src, buf, len);
|
||||
mic2sjis(buf, dest, strlen(buf));
|
||||
pfree(buf);
|
||||
euc_jp2sjis(src, dest, len);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
|
||||
unsigned char *src = PG_GETARG_CSTRING(2);
|
||||
unsigned char *dest = PG_GETARG_CSTRING(3);
|
||||
int len = PG_GETARG_INT32(4);
|
||||
unsigned char *buf;
|
||||
|
||||
Assert(PG_GETARG_INT32(0) == PG_SJIS);
|
||||
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
|
||||
Assert(len >= 0);
|
||||
|
||||
buf = palloc(len * ENCODING_GROWTH_RATE);
|
||||
sjis2mic(src, buf, len);
|
||||
mic2euc_jp(buf, dest, strlen(buf));
|
||||
pfree(buf);
|
||||
sjis2euc_jp(src, dest, len);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
|
||||
}
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* EUC_JP -> SJIS
|
||||
*/
|
||||
static void
|
||||
euc_jp2sjis(unsigned char *euc, unsigned char *p, int len)
|
||||
{
|
||||
int c1,
|
||||
c2,
|
||||
k;
|
||||
unsigned char *euc_end = euc + len;
|
||||
|
||||
while (euc_end >= euc && (c1 = *euc++))
|
||||
{
|
||||
if(c1 < 0x80)
|
||||
{
|
||||
/* should be ASCII */
|
||||
*p++ = c1;
|
||||
}
|
||||
else if (c1 == SS2)
|
||||
{
|
||||
/* hankaku kana? */
|
||||
*p++ = *euc++;
|
||||
}
|
||||
else if (c1 == SS3)
|
||||
{
|
||||
/* JIS X0212 kanji? */
|
||||
c1 = *euc++;
|
||||
c2 = *euc++;
|
||||
k = c1 << 8 | c2;
|
||||
if (k >= 0xf5a1)
|
||||
{
|
||||
/* UDC2 */
|
||||
c1 -= 0x54;
|
||||
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
|
||||
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i, k2;
|
||||
|
||||
/* IBM kanji */
|
||||
for (i = 0;; i++)
|
||||
{
|
||||
k2 = ibmkanji[i].euc & 0xffff;
|
||||
if (k2 == 0xffff)
|
||||
{
|
||||
*p++ = PGSJISALTCODE >> 8;
|
||||
*p++ = PGSJISALTCODE & 0xff;
|
||||
break;
|
||||
}
|
||||
if (k2 == k)
|
||||
{
|
||||
k = ibmkanji[i].sjis;
|
||||
*p++ = k >> 8;
|
||||
*p++ = k & 0xff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* JIS X0208 kanji? */
|
||||
c2 = *euc++;
|
||||
k = (c1 << 8) | (c2 & 0xff);
|
||||
if (k >= 0xf5a1)
|
||||
{
|
||||
/* UDC1 */
|
||||
c1 -= 0x54;
|
||||
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
|
||||
}
|
||||
else
|
||||
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
|
||||
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
* SJIS ---> EUC_JP
|
||||
*/
|
||||
static void
|
||||
sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len)
|
||||
{
|
||||
int c1,
|
||||
c2,
|
||||
i,
|
||||
k,
|
||||
k2;
|
||||
unsigned char *sjis_end = sjis + len;
|
||||
|
||||
while (sjis_end >= sjis && (c1 = *sjis++))
|
||||
{
|
||||
if(c1 < 0x80)
|
||||
{
|
||||
/* should be ASCII */
|
||||
*p++ = c1;
|
||||
}
|
||||
else if (c1 >= 0xa1 && c1 <= 0xdf)
|
||||
{
|
||||
/* JIS X0201 (1 byte kana) */
|
||||
*p++ = SS2;
|
||||
*p++ = c1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* JIS X0208, X0212, user defined extended characters
|
||||
*/
|
||||
c2 = *sjis++;
|
||||
k = (c1 << 8) + c2;
|
||||
if (k >= 0xed40 && k < 0xf040)
|
||||
{
|
||||
/* NEC selection IBM kanji */
|
||||
for (i = 0;; i++)
|
||||
{
|
||||
k2 = ibmkanji[i].nec;
|
||||
if (k2 == 0xffff)
|
||||
break;
|
||||
if (k2 == k)
|
||||
{
|
||||
k = ibmkanji[i].sjis;
|
||||
c1 = (k >> 8) & 0xff;
|
||||
c2 = k & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (k < 0xeb3f)
|
||||
{
|
||||
/* JIS X0208 */
|
||||
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
|
||||
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
|
||||
}
|
||||
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
|
||||
{
|
||||
/* NEC selection IBM kanji - Other undecided justice */
|
||||
*p++ = PGEUCALTCODE >> 8;
|
||||
*p++ = PGEUCALTCODE & 0xff;
|
||||
}
|
||||
else if (k >= 0xf040 && k < 0xf540)
|
||||
{
|
||||
/*
|
||||
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
|
||||
* 0x7e7e EUC 0xf5a1 - 0xfefe
|
||||
*/
|
||||
c1 -= 0x6f;
|
||||
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
|
||||
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
|
||||
}
|
||||
else if (k >= 0xf540 && k < 0xfa40)
|
||||
{
|
||||
/*
|
||||
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
|
||||
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
|
||||
*/
|
||||
*p++ = SS3;
|
||||
c1 -= 0x74;
|
||||
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
|
||||
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
|
||||
}
|
||||
else if (k >= 0xfa40)
|
||||
{
|
||||
/*
|
||||
* mapping IBM kanji to X0208 and X0212
|
||||
*
|
||||
*/
|
||||
for (i = 0;; i++)
|
||||
{
|
||||
k2 = ibmkanji[i].sjis;
|
||||
if (k2 == 0xffff)
|
||||
break;
|
||||
if (k2 == k)
|
||||
{
|
||||
k = ibmkanji[i].euc;
|
||||
if (k >= 0x8f0000)
|
||||
{
|
||||
*p++ = SS3;
|
||||
*p++ = 0x80 | ((k & 0xff00) >> 8);
|
||||
*p++ = 0x80 | (k & 0xff);
|
||||
}
|
||||
else
|
||||
{
|
||||
*p++ = 0x80 | (k >> 8);
|
||||
*p++ = 0x80 | (k & 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user