gcc/libjava/gnu/gcj/convert/natInput_EUCJIS.cc
Warren Levy cb1902adbf [multiple changes]
1999-07-01  Warren Levy  <warrenl@cygnus.com>
	* gnu/gcj/convert/BytesToUnicode.java (read): Changed outlength
	to count and revised comments to match.
	* gnu/gcj/convert/Input_EUCJIS.java (read): Same as Input_8859_1.java.
	* gnu/gcj/convert/Input_JavaSrc.java (read): ditto.
	* gnu/gcj/convert/Input_SJIS.java (read): ditto.
	* gnu/gcj/convert/Input_UTF8.java (read): ditto.
	* gnu/gcj/convert/natInput_EUCJIS.cc (read): ditto.
	* gnu/gcj/convert/natInput_SJIS.cc (read): ditto.
1999-07-01  John-Marc Chandonia  <jmc@cmpharm.ucsf.edu>
	* gnu/gcj/convert/Input_8859_1.java (read): Use 3rd parameter
	properly as count rather than outlength.
	* java/io/BufferedOutputStream.java (write(byte[],int,int): Flush
	output on overflow rather than buffer fill.
	* java/io/BufferedReader.java (fill): Don't clear out the buffer
	if markPos is 0 and there is still room in the buffer.

From-SVN: r27892
1999-07-01 17:01:00 +00:00

102 lines
2.2 KiB
C++

/* Copyright (C) 1999 Cygnus Solutions
This file is part of libgcj.
This software is copyrighted work licensed under the terms of the
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
details. */
#include <config.h>
#include <cni.h>
#include <gnu/gcj/convert/Input_EUCJIS.h>
#define ERROR_CHAR 0xFFFD
extern unsigned short JIS0208_to_Unicode[84][94];
extern unsigned short JIS0212_to_Unicode[76][94];
jint
gnu::gcj::convert::Input_EUCJIS::read(jcharArray outbuffer, jint outpos,
jint count)
{
jint start_outpos = outpos;
for (;;)
{
if (outpos - start_outpos >= count)
break;
if (inpos >= inlength)
break;
int b = ((unsigned char*) elements(inbuffer))[inpos++];
if (codeset == 0) // ASCII or JIS-Roman
{
if (b < 128)
{
#if 1
// Technically, we should translate 0x5c to Yen symbol;
// in practice, it is not clear.
if (b == 0x5c)
b = 0x00A5; // Yen sign.
#endif
elements(outbuffer)[outpos++] = (char) b;
}
else
{
if (b == 0x8E) // SS2
codeset = 2;
else if (b == 0x8F) // SS3
codeset = 3;
else
{
codeset = 1;
first_byte = b;
}
}
}
else if (codeset == 1) // JIS X 0208:1997
{
first_byte -= 0x80 + 33;
b -= 0x80 + 33;
if ((unsigned) first_byte >= 84 || (unsigned) b >= 94)
b = ERROR_CHAR;
else
{
b = JIS0208_to_Unicode[first_byte][b];
if (b == 0)
b = ERROR_CHAR;
}
elements(outbuffer)[outpos++] = b;
codeset = 0;
}
else if (codeset == 2) // Half-width katakana
{
if (b >= 0xA1 && b <= 0xDF)
b += 0xFF61 - 0xA1;
else
b = ERROR_CHAR;
elements(outbuffer)[outpos++] = b;
codeset = 0;
}
else if (codeset == 3) // second byte of JIS X 0212-1990
{
first_byte = b;
codeset = 4;
}
else // codeset == 4 // third byte of JIS X 0212-1990
{
first_byte -= 0x80 + 34;
b -= 0x80 + 33;
if ((unsigned) first_byte >= 76 || (unsigned) b >= 94)
b = ERROR_CHAR;
else
{
b = JIS0208_to_Unicode[first_byte][b];
if (b == 0)
b = ERROR_CHAR;
}
elements(outbuffer)[outpos++] = b;
codeset = 0;
}
}
return outpos - start_outpos;
}