mirror of
https://github.com/konsoletyper/teavm.git
synced 2025-01-06 10:15:18 +08:00
Replacing old TeaVM-based charsets with NIO charsets
This commit is contained in:
parent
cc5225a2a6
commit
aa2451c3e3
@ -1,61 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Alexey Andreev.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.teavm.classlib.impl.charset;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Alexey Andreev
|
||||
*/
|
||||
public class UTF16Helper {
|
||||
public static final int SURROGATE_NEUTRAL_BIT_MASK = 0xF800;
|
||||
public static final int SURROGATE_BITS = 0xD800;
|
||||
public static final int SURROGATE_BIT_MASK = 0xFC00;
|
||||
public static final int SURROGATE_BIT_INV_MASK = 0x03FF;
|
||||
public static final int HIGH_SURROGATE_BITS = 0xD800;
|
||||
public static final int LOW_SURROGATE_BITS = 0xDC00;
|
||||
public static final int MEANINGFUL_SURROGATE_BITS = 10;
|
||||
public static final int SUPPLEMENTARY_PLANE = 0x10000;
|
||||
|
||||
public static char highSurrogate(int codePoint) {
|
||||
codePoint -= SUPPLEMENTARY_PLANE;
|
||||
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
|
||||
}
|
||||
|
||||
public static char lowSurrogate(int codePoint) {
|
||||
return (char)(LOW_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
|
||||
}
|
||||
|
||||
public static boolean isHighSurrogate(char c) {
|
||||
return (c & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
public static boolean isLowSurrogate(char c) {
|
||||
return (c & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
public static boolean isSurrogatePair(char a, char b) {
|
||||
return isHighSurrogate(a) && isLowSurrogate(b);
|
||||
}
|
||||
|
||||
public static int buildCodePoint(char a, char b) {
|
||||
return (((a & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | (b & SURROGATE_BIT_INV_MASK)) +
|
||||
SUPPLEMENTARY_PLANE;
|
||||
}
|
||||
|
||||
public static boolean isSurrogate(char c) {
|
||||
return (c & SURROGATE_NEUTRAL_BIT_MASK) == SURROGATE_BITS;
|
||||
}
|
||||
}
|
@ -29,17 +29,17 @@ public class UTF8Charset extends Charset {
|
||||
} else if (ch < 0x400) {
|
||||
dest.put((byte)(0xC0 | (ch >> 6)));
|
||||
dest.put((byte)(0x80 | (ch & 0x3F)));
|
||||
} else if (!UTF16Helper.isSurrogate(ch)) {
|
||||
} else if (!Character.isSurrogate(ch)) {
|
||||
dest.put((byte)(0xE0 | (ch >> 12)));
|
||||
dest.put((byte)(0x80 | ((ch >> 6) & 0x3F)));
|
||||
dest.put((byte)(0x80 | (ch & 0x3F)));
|
||||
} else if (UTF16Helper.isHighSurrogate(ch)) {
|
||||
} else if (Character.isHighSurrogate(ch)) {
|
||||
char low = source.get();
|
||||
if (!UTF16Helper.isLowSurrogate(low)) {
|
||||
if (!Character.isLowSurrogate(low)) {
|
||||
source.back(1);
|
||||
dest.put((byte)'?');
|
||||
} else {
|
||||
int codePoint = UTF16Helper.buildCodePoint(ch, low);
|
||||
int codePoint = Character.toCodePoint(ch, low);
|
||||
dest.put((byte)(0xF0 | (codePoint >> 18)));
|
||||
dest.put((byte)(0x80 | ((codePoint >> 12) & 0x3F)));
|
||||
dest.put((byte)(0x80 | ((codePoint >> 6) & 0x3F)));
|
||||
@ -72,7 +72,7 @@ public class UTF8Charset extends Charset {
|
||||
byte b2 = source.get();
|
||||
byte b3 = source.get();
|
||||
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
||||
dest.put(!UTF16Helper.isHighSurrogate(c) ? c : '?');
|
||||
dest.put(!Character.isHighSurrogate(c) ? c : '?');
|
||||
} else if ((b & 0xF8) == 0xF0) {
|
||||
if (source.available() < 3) {
|
||||
source.skip(source.available());
|
||||
@ -83,8 +83,8 @@ public class UTF8Charset extends Charset {
|
||||
byte b3 = source.get();
|
||||
byte b4 = source.get();
|
||||
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
||||
dest.put(UTF16Helper.highSurrogate(code));
|
||||
dest.put(UTF16Helper.lowSurrogate(code));
|
||||
dest.put(Character.highSurrogate(code));
|
||||
dest.put(Character.lowSurrogate(code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,11 +15,13 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.io;
|
||||
|
||||
import org.teavm.classlib.impl.charset.ByteBuffer;
|
||||
import org.teavm.classlib.impl.charset.CharBuffer;
|
||||
import org.teavm.classlib.impl.charset.Charset;
|
||||
import org.teavm.classlib.impl.charset.UTF8Charset;
|
||||
import org.teavm.classlib.java.lang.TString;
|
||||
import org.teavm.classlib.java.nio.TByteBuffer;
|
||||
import org.teavm.classlib.java.nio.TCharBuffer;
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCharsetDecoder;
|
||||
import org.teavm.classlib.java.nio.charset.TCodingErrorAction;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -27,30 +29,30 @@ import org.teavm.classlib.java.lang.TString;
|
||||
*/
|
||||
public class TInputStreamReader extends TReader {
|
||||
private TInputStream stream;
|
||||
private Charset charset;
|
||||
private TCharset charset;
|
||||
private TString charsetName;
|
||||
private byte[] inData = new byte[8192];
|
||||
private ByteBuffer inBuffer = new ByteBuffer(inData);
|
||||
private TByteBuffer inBuffer = TByteBuffer.wrap(inData);
|
||||
private char[] outData = new char[1024];
|
||||
private CharBuffer outBuffer = new CharBuffer(outData);
|
||||
private TCharBuffer outBuffer = TCharBuffer.wrap(outData);
|
||||
private boolean streamEof;
|
||||
private boolean eof;
|
||||
|
||||
public TInputStreamReader(TInputStream in, TString charsetName) {
|
||||
this(in, Charset.get(charsetName.toString()));
|
||||
this(in, TCharset.forName(charsetName.toString()));
|
||||
this.charsetName = charsetName;
|
||||
}
|
||||
|
||||
public TInputStreamReader(TInputStream in) {
|
||||
this(in, new UTF8Charset());
|
||||
this(in, new TUTF8Charset());
|
||||
charsetName = TString.wrap("UTF-8");
|
||||
}
|
||||
|
||||
private TInputStreamReader(TInputStream in, Charset charset) {
|
||||
public TInputStreamReader(TInputStream in, TCharset charset) {
|
||||
this.stream = in;
|
||||
this.charset = charset;
|
||||
outBuffer.skip(outBuffer.available());
|
||||
inBuffer.skip(inBuffer.available());
|
||||
outBuffer.position(outBuffer.limit());
|
||||
inBuffer.position(inBuffer.limit());
|
||||
}
|
||||
|
||||
public TString getEncoding() {
|
||||
@ -64,10 +66,10 @@ public class TInputStreamReader extends TReader {
|
||||
|
||||
@Override
|
||||
public int read() throws TIOException {
|
||||
if (eof && outBuffer.end()) {
|
||||
if (eof && !outBuffer.hasRemaining()) {
|
||||
return -1;
|
||||
}
|
||||
if (!outBuffer.end()) {
|
||||
if (outBuffer.hasRemaining()) {
|
||||
return outBuffer.get();
|
||||
}
|
||||
return fillBuffer() ? outBuffer.get() : -1;
|
||||
@ -75,37 +77,40 @@ public class TInputStreamReader extends TReader {
|
||||
|
||||
@Override
|
||||
public int read(char[] cbuf, int off, int len) throws TIOException {
|
||||
if (eof && outBuffer.end()) {
|
||||
if (eof && !outBuffer.hasRemaining()) {
|
||||
return -1;
|
||||
}
|
||||
CharBuffer wrapBuffer = new CharBuffer(cbuf, off, off + len);
|
||||
while (!wrapBuffer.end()) {
|
||||
wrapBuffer.put(outBuffer);
|
||||
if (outBuffer.end() && !fillBuffer()) {
|
||||
int bytesRead = 0;
|
||||
while (len > 0) {
|
||||
int sz = Math.min(len, outBuffer.remaining());
|
||||
outBuffer.get(cbuf, off + bytesRead, sz);
|
||||
len -= sz;
|
||||
bytesRead += sz;
|
||||
if (!outBuffer.hasRemaining() && !fillBuffer()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return wrapBuffer.position() - off;
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
private boolean fillBuffer() throws TIOException {
|
||||
if (eof) {
|
||||
return false;
|
||||
}
|
||||
CharBuffer newBuffer = new CharBuffer(outData);
|
||||
newBuffer.put(outBuffer);
|
||||
outBuffer.compact();
|
||||
TCharsetDecoder decoder = charset.newDecoder()
|
||||
.onMalformedInput(TCodingErrorAction.REPLACE)
|
||||
.onUnmappableCharacter(TCodingErrorAction.IGNORE);
|
||||
while (true) {
|
||||
if (inBuffer.end() && !fillReadBuffer()) {
|
||||
if (!inBuffer.hasRemaining() && !fillReadBuffer()) {
|
||||
eof = true;
|
||||
break;
|
||||
}
|
||||
int oldAvail = newBuffer.available();
|
||||
charset.decode(inBuffer, newBuffer);
|
||||
if (oldAvail == newBuffer.available()) {
|
||||
if (decoder.decode(inBuffer, outBuffer, eof).isOverflow()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
outBuffer = new CharBuffer(outData, 0, newBuffer.position());
|
||||
outBuffer.flip();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -113,30 +118,25 @@ public class TInputStreamReader extends TReader {
|
||||
if (streamEof) {
|
||||
return false;
|
||||
}
|
||||
int off = 0;
|
||||
while (!inBuffer.end()) {
|
||||
inData[off] = inBuffer.get();
|
||||
}
|
||||
inBuffer.rewind(0);
|
||||
while (off < inData.length) {
|
||||
int bytesRead = stream.read(inData, off, inData.length - off);
|
||||
inBuffer.compact();
|
||||
while (inBuffer.hasRemaining()) {
|
||||
int bytesRead = stream.read(inBuffer.array(), inBuffer.position(), inBuffer.remaining());
|
||||
if (bytesRead == -1) {
|
||||
streamEof = true;
|
||||
inBuffer = new ByteBuffer(inData, 0, inBuffer.position());
|
||||
break;
|
||||
} else {
|
||||
off += bytesRead;
|
||||
inBuffer.position(inBuffer.position() + bytesRead);
|
||||
if (bytesRead == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
inBuffer = new ByteBuffer(inData, 0, off);
|
||||
inBuffer.flip();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean ready() throws TIOException {
|
||||
return !outBuffer.end() || inBuffer.end();
|
||||
return outBuffer.hasRemaining() || inBuffer.hasRemaining();
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.lang;
|
||||
|
||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||
import org.teavm.classlib.java.io.TSerializable;
|
||||
import org.teavm.classlib.java.util.TArrays;
|
||||
|
||||
@ -553,12 +552,12 @@ class TAbstractStringBuilder extends TObject implements TSerializable, TCharSequ
|
||||
}
|
||||
|
||||
protected TAbstractStringBuilder appendCodePoint(int codePoint) {
|
||||
if (codePoint < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
if (codePoint < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
return append((char)codePoint);
|
||||
}
|
||||
ensureCapacity(length + 2);
|
||||
buffer[length++] = UTF16Helper.highSurrogate(codePoint);
|
||||
buffer[length++] = UTF16Helper.lowSurrogate(codePoint);
|
||||
buffer[length++] = TCharacter.highSurrogate(codePoint);
|
||||
buffer[length++] = TCharacter.lowSurrogate(codePoint);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.lang;
|
||||
|
||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||
import org.teavm.classlib.impl.unicode.UnicodeHelper;
|
||||
import org.teavm.platform.Platform;
|
||||
import org.teavm.platform.metadata.MetadataProvider;
|
||||
@ -96,6 +95,13 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
private static UnicodeHelper.Range[] classMapping;
|
||||
private char value;
|
||||
private static TCharacter[] characterCache = new TCharacter[128];
|
||||
private static final int SURROGATE_NEUTRAL_BIT_MASK = 0xF800;
|
||||
private static final int SURROGATE_BITS = 0xD800;
|
||||
private static final int SURROGATE_BIT_MASK = 0xFC00;
|
||||
private static final int SURROGATE_BIT_INV_MASK = 0x03FF;
|
||||
private static final int HIGH_SURROGATE_BITS = 0xD800;
|
||||
private static final int LOW_SURROGATE_BITS = 0xDC00;
|
||||
private static final int MEANINGFUL_SURROGATE_BITS = 10;
|
||||
|
||||
public TCharacter(char value) {
|
||||
this.value = value;
|
||||
@ -152,11 +158,11 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
}
|
||||
|
||||
public static boolean isHighSurrogate(char ch) {
|
||||
return UTF16Helper.isHighSurrogate(ch);
|
||||
return (ch & SURROGATE_BIT_MASK) == HIGH_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
public static boolean isLowSurrogate(char ch) {
|
||||
return UTF16Helper.isLowSurrogate(ch);
|
||||
return (ch & SURROGATE_BIT_MASK) == LOW_SURROGATE_BITS;
|
||||
}
|
||||
|
||||
public static boolean isSurrogate(char ch) {
|
||||
@ -172,7 +178,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
}
|
||||
|
||||
public static int toCodePoint(char high, char low) {
|
||||
return UTF16Helper.buildCodePoint(high, low);
|
||||
return (((high & SURROGATE_BIT_INV_MASK) << MEANINGFUL_SURROGATE_BITS) | (low & SURROGATE_BIT_INV_MASK)) +
|
||||
MIN_SUPPLEMENTARY_CODE_POINT;
|
||||
}
|
||||
|
||||
public static int codePointAt(TCharSequence seq, int index) {
|
||||
@ -216,11 +223,12 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
}
|
||||
|
||||
public static char highSurrogate(int codePoint) {
|
||||
return UTF16Helper.highSurrogate(codePoint);
|
||||
codePoint -= MIN_SUPPLEMENTARY_CODE_POINT;
|
||||
return (char)(HIGH_SURROGATE_BITS | (codePoint >> MEANINGFUL_SURROGATE_BITS) & SURROGATE_BIT_INV_MASK);
|
||||
}
|
||||
|
||||
public static char lowSurrogate(int codePoint) {
|
||||
return UTF16Helper.lowSurrogate(codePoint);
|
||||
return (char)(LOW_SURROGATE_BITS | codePoint & SURROGATE_BIT_INV_MASK);
|
||||
}
|
||||
|
||||
public static char toLowerCase(char ch) {
|
||||
@ -309,9 +317,9 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
private static native StringResource obtainClasses();
|
||||
|
||||
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
dst[dstIndex] = UTF16Helper.highSurrogate(codePoint);
|
||||
dst[dstIndex + 1] = UTF16Helper.lowSurrogate(codePoint);
|
||||
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
dst[dstIndex] = highSurrogate(codePoint);
|
||||
dst[dstIndex + 1] = lowSurrogate(codePoint);
|
||||
return 2;
|
||||
} else {
|
||||
dst[dstIndex] = (char)codePoint;
|
||||
@ -320,8 +328,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
}
|
||||
|
||||
public static char[] toChars(int codePoint) {
|
||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
return new char[] { UTF16Helper.highSurrogate(codePoint), UTF16Helper.lowSurrogate(codePoint) };
|
||||
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
return new char[] { highSurrogate(codePoint), lowSurrogate(codePoint) };
|
||||
} else {
|
||||
return new char[] { (char)codePoint };
|
||||
}
|
||||
@ -331,7 +339,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
int count = endIndex - beginIndex;
|
||||
--endIndex;
|
||||
for (int i = beginIndex; i < endIndex; ++i) {
|
||||
if (UTF16Helper.isHighSurrogate(seq.charAt(i)) && UTF16Helper.isLowSurrogate(seq.charAt(i + 1))) {
|
||||
if (isHighSurrogate(seq.charAt(i)) && isLowSurrogate(seq.charAt(i + 1))) {
|
||||
--count;
|
||||
++i;
|
||||
}
|
||||
@ -343,7 +351,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
int r = count;
|
||||
--count;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (UTF16Helper.isHighSurrogate(a[offset]) && UTF16Helper.isLowSurrogate(a[offset + i + 1])) {
|
||||
if (isHighSurrogate(a[offset]) && isLowSurrogate(a[offset + i + 1])) {
|
||||
--r;
|
||||
++i;
|
||||
}
|
||||
@ -353,8 +361,8 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
|
||||
public static int offsetByCodePoints(TCharSequence seq, int index, int codePointOffset) {
|
||||
for (int i = 0; i < codePointOffset; ++i) {
|
||||
if (index < seq.length() - 1 && UTF16Helper.isHighSurrogate(seq.charAt(index)) &&
|
||||
UTF16Helper.isLowSurrogate(seq.charAt(index + 1))) {
|
||||
if (index < seq.length() - 1 && isHighSurrogate(seq.charAt(index)) &&
|
||||
isLowSurrogate(seq.charAt(index + 1))) {
|
||||
index += 2;
|
||||
} else {
|
||||
index++;
|
||||
@ -365,8 +373,7 @@ public class TCharacter extends TObject implements TComparable<TCharacter> {
|
||||
|
||||
public static int offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset) {
|
||||
for (int i = 0; i < codePointOffset; ++i) {
|
||||
if (index < count - 1 && UTF16Helper.isHighSurrogate(a[index + start]) &&
|
||||
UTF16Helper.isLowSurrogate(a[index + start + 1])) {
|
||||
if (index < count - 1 && isHighSurrogate(a[index + start]) && isLowSurrogate(a[index + start + 1])) {
|
||||
index += 2;
|
||||
} else {
|
||||
index++;
|
||||
|
@ -15,9 +15,12 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.lang;
|
||||
|
||||
import org.teavm.classlib.impl.charset.*;
|
||||
import org.teavm.classlib.java.io.TSerializable;
|
||||
import org.teavm.classlib.java.io.TUnsupportedEncodingException;
|
||||
import org.teavm.classlib.java.nio.TByteBuffer;
|
||||
import org.teavm.classlib.java.nio.TCharBuffer;
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.impl.TUTF8Charset;
|
||||
import org.teavm.classlib.java.util.TArrays;
|
||||
import org.teavm.classlib.java.util.TComparator;
|
||||
import org.teavm.classlib.java.util.THashMap;
|
||||
@ -61,15 +64,15 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
|
||||
public TString(byte[] bytes, int offset, int length, TString charsetName) throws TUnsupportedEncodingException {
|
||||
Charset charset = Charset.get(charsetName.toString());
|
||||
if (charset == null) {
|
||||
throw new TUnsupportedEncodingException(TString.wrap("Unknown encoding:" + charsetName));
|
||||
}
|
||||
this(bytes, offset, length, TCharset.forName(charsetName.toString()));
|
||||
}
|
||||
|
||||
public TString(byte[] bytes, int offset, int length, TCharset charset) {
|
||||
initWithBytes(bytes, offset, length, charset);
|
||||
}
|
||||
|
||||
public TString(byte[] bytes, int offset, int length) {
|
||||
initWithBytes(bytes, offset, length, new UTF8Charset());
|
||||
initWithBytes(bytes, offset, length, new TUTF8Charset());
|
||||
}
|
||||
|
||||
public TString(byte[] bytes) {
|
||||
@ -80,14 +83,18 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
this(bytes, 0, bytes.length, charsetName);
|
||||
}
|
||||
|
||||
public TString(byte[] bytes, TCharset charset) {
|
||||
this(bytes, 0, bytes.length, charset);
|
||||
}
|
||||
|
||||
public TString(int[] codePoints, int offset, int count) {
|
||||
characters = new char[count * 2];
|
||||
int charCount = 0;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
int codePoint = codePoints[offset++];
|
||||
if (codePoint >= UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
characters[charCount++] = UTF16Helper.highSurrogate(codePoint);
|
||||
characters[charCount++] = UTF16Helper.lowSurrogate(codePoint);
|
||||
if (codePoint >= TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
characters[charCount++] = TCharacter.highSurrogate(codePoint);
|
||||
characters[charCount++] = TCharacter.lowSurrogate(codePoint);
|
||||
} else {
|
||||
characters[charCount++] = (char)codePoint;
|
||||
}
|
||||
@ -97,19 +104,14 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
}
|
||||
|
||||
private void initWithBytes(byte[] bytes, int offset, int length, Charset charset) {
|
||||
TStringBuilder sb = new TStringBuilder(bytes.length * 2);
|
||||
this.characters = new char[sb.length()];
|
||||
ByteBuffer source = new ByteBuffer(bytes, offset, offset + length);
|
||||
char[] destChars = new char[TMath.max(8, TMath.min(length * 2, 1024))];
|
||||
CharBuffer dest = new CharBuffer(destChars, 0, destChars.length);
|
||||
while (!source.end()) {
|
||||
charset.decode(source, dest);
|
||||
sb.append(destChars, 0, dest.position());
|
||||
dest.rewind(0);
|
||||
private void initWithBytes(byte[] bytes, int offset, int length, TCharset charset) {
|
||||
TCharBuffer buffer = charset.decode(TByteBuffer.wrap(bytes, offset, length));
|
||||
if (buffer.hasArray() && buffer.position() == 0 && buffer.limit() == buffer.capacity()) {
|
||||
characters = buffer.array();
|
||||
} else {
|
||||
characters = new char[buffer.remaining()];
|
||||
buffer.get(characters);
|
||||
}
|
||||
characters = new char[sb.length()];
|
||||
sb.getChars(0, sb.length(), characters, 0);
|
||||
}
|
||||
|
||||
public TString(TStringBuilder sb) {
|
||||
@ -283,7 +285,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
|
||||
public int indexOf(int ch, int fromIndex) {
|
||||
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
if (ch < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
char bmpChar = (char)ch;
|
||||
for (int i = fromIndex; i < characters.length; ++i) {
|
||||
if (characters[i] == bmpChar) {
|
||||
@ -292,8 +294,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
return -1;
|
||||
} else {
|
||||
char hi = UTF16Helper.highSurrogate(ch);
|
||||
char lo = UTF16Helper.lowSurrogate(ch);
|
||||
char hi = TCharacter.highSurrogate(ch);
|
||||
char lo = TCharacter.lowSurrogate(ch);
|
||||
for (int i = fromIndex; i < characters.length - 1; ++i) {
|
||||
if (characters[i] == hi && characters[i + 1] == lo) {
|
||||
return i;
|
||||
@ -308,7 +310,7 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
|
||||
public int lastIndexOf(int ch, int fromIndex) {
|
||||
if (ch < UTF16Helper.SUPPLEMENTARY_PLANE) {
|
||||
if (ch < TCharacter.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
char bmpChar = (char)ch;
|
||||
for (int i = fromIndex; i >= 0; --i) {
|
||||
if (characters[i] == bmpChar) {
|
||||
@ -317,8 +319,8 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
return -1;
|
||||
} else {
|
||||
char hi = UTF16Helper.highSurrogate(ch);
|
||||
char lo = UTF16Helper.lowSurrogate(ch);
|
||||
char hi = TCharacter.highSurrogate(ch);
|
||||
char lo = TCharacter.lowSurrogate(ch);
|
||||
for (int i = fromIndex; i >= 1; --i) {
|
||||
if (characters[i] == lo && characters[i - 1] == hi) {
|
||||
return i - 1;
|
||||
@ -550,34 +552,22 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
}
|
||||
|
||||
public byte[] getBytes(TString charsetName) throws TUnsupportedEncodingException {
|
||||
Charset charset = Charset.get(charsetName.toString());
|
||||
if (charset == null) {
|
||||
throw new TUnsupportedEncodingException(TString.wrap("Unsupported encoding: " + charsetName));
|
||||
}
|
||||
return getBytes(charset);
|
||||
return getBytes(TCharset.forName(charsetName.toString()));
|
||||
}
|
||||
|
||||
public byte[] getBytes() {
|
||||
return getBytes(new UTF8Charset());
|
||||
return getBytes(new TUTF8Charset());
|
||||
}
|
||||
|
||||
private byte[] getBytes(Charset charset) {
|
||||
byte[] result = new byte[length() * 2];
|
||||
int resultLength = 0;
|
||||
byte[] destArray = new byte[TMath.max(16, TMath.min(length() * 2, 4096))];
|
||||
ByteBuffer dest = new ByteBuffer(destArray);
|
||||
CharBuffer src = new CharBuffer(characters);
|
||||
while (!src.end()) {
|
||||
charset.encode(src, dest);
|
||||
if (resultLength + dest.position() > result.length) {
|
||||
result = TArrays.copyOf(result, result.length * 2);
|
||||
}
|
||||
for (int i = 0; i < dest.position(); ++i) {
|
||||
result[resultLength++] = destArray[i];
|
||||
}
|
||||
dest.rewind(0);
|
||||
public byte[] getBytes(TCharset charset) {
|
||||
TByteBuffer buffer = charset.encode(TCharBuffer.wrap(characters));
|
||||
if (buffer.hasArray() && buffer.position() == 0 && buffer.limit() == buffer.capacity()) {
|
||||
return buffer.array();
|
||||
} else {
|
||||
byte[] result = new byte[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
return result;
|
||||
}
|
||||
return TArrays.copyOf(result, resultLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -601,11 +591,11 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
int[] codePoints = new int[characters.length];
|
||||
int codePointCount = 0;
|
||||
for (int i = 0; i < characters.length; ++i) {
|
||||
if (i == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[i]) ||
|
||||
!UTF16Helper.isLowSurrogate(characters[i + 1])) {
|
||||
if (i == characters.length - 1 || !TCharacter.isHighSurrogate(characters[i]) ||
|
||||
!TCharacter.isLowSurrogate(characters[i + 1])) {
|
||||
codePoints[codePointCount++] = TCharacter.toLowerCase(characters[i]);
|
||||
} else {
|
||||
codePoints[codePointCount++] = TCharacter.toLowerCase(UTF16Helper.buildCodePoint(
|
||||
codePoints[codePointCount++] = TCharacter.toLowerCase(TCharacter.toCodePoint(
|
||||
characters[i], characters[i + 1]));
|
||||
++i;
|
||||
}
|
||||
@ -620,11 +610,11 @@ public class TString extends TObject implements TSerializable, TComparable<TStri
|
||||
int[] codePoints = new int[characters.length];
|
||||
int codePointCount = 0;
|
||||
for (int i = 0; i < characters.length; ++i) {
|
||||
if (i == characters.length - 1 || !UTF16Helper.isHighSurrogate(characters[i]) ||
|
||||
!UTF16Helper.isLowSurrogate(characters[i + 1])) {
|
||||
if (i == characters.length - 1 || !TCharacter.isHighSurrogate(characters[i]) ||
|
||||
!TCharacter.isLowSurrogate(characters[i + 1])) {
|
||||
codePoints[codePointCount++] = TCharacter.toUpperCase(characters[i]);
|
||||
} else {
|
||||
codePoints[codePointCount++] = TCharacter.toUpperCase(UTF16Helper.buildCodePoint(
|
||||
codePoints[codePointCount++] = TCharacter.toUpperCase(TCharacter.toCodePoint(
|
||||
characters[i], characters[i + 1]));
|
||||
++i;
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ public abstract class TCharsetDecoder {
|
||||
private TCharset charset;
|
||||
private float averageCharsPerByte;
|
||||
private float maxCharsPerByte;
|
||||
private String replacement = "?";
|
||||
private String replacement = "\uFFFD";
|
||||
private TCodingErrorAction malformedAction = TCodingErrorAction.REPORT;
|
||||
private TCodingErrorAction unmappableAction = TCodingErrorAction.REPORT;
|
||||
private int state;
|
||||
@ -126,8 +126,18 @@ public abstract class TCharsetDecoder {
|
||||
return result;
|
||||
} else if (result.isUnderflow()) {
|
||||
if (endOfInput && in.hasRemaining()) {
|
||||
state = END;
|
||||
return TCoderResult.malformedForLength(in.remaining());
|
||||
if (malformedAction == TCodingErrorAction.REPORT) {
|
||||
return TCoderResult.malformedForLength(in.remaining());
|
||||
} else {
|
||||
if (out.remaining() > replacement.length()) {
|
||||
in.position(in.position() + in.remaining());
|
||||
if (malformedAction == TCodingErrorAction.REPLACE) {
|
||||
out.put(replacement);
|
||||
}
|
||||
} else {
|
||||
return TCoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
} else if (result.isMalformed()) {
|
||||
|
@ -15,7 +15,6 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
@ -44,7 +43,13 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||
}
|
||||
break;
|
||||
}
|
||||
outArray[outPos++] = (char)(((b & 0x1F) << 6) | (inArray[inPos++] & 0x3F));
|
||||
byte b2 = inArray[inPos++];
|
||||
if (!checkMidByte(b2)) {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
outArray[outPos++] = (char)(((b & 0x1F) << 6) | (b2 & 0x3F));
|
||||
} else if ((b & 0xF0) == 0xE0) {
|
||||
if (inPos + 2 > inSize) {
|
||||
--inPos;
|
||||
@ -55,6 +60,11 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||
}
|
||||
byte b2 = inArray[inPos++];
|
||||
byte b3 = inArray[inPos++];
|
||||
if (!checkMidByte(b2) || !checkMidByte(b3)) {
|
||||
inPos -= 3;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
char c = (char)(((b & 0x0F) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x3F));
|
||||
if (Character.isSurrogate(c)) {
|
||||
inPos -= 3;
|
||||
@ -72,7 +82,7 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||
}
|
||||
if (outPos + 2 > outSize) {
|
||||
--inPos;
|
||||
if (!controller.hasMoreOutput()) {
|
||||
if (!controller.hasMoreOutput(2)) {
|
||||
result = TCoderResult.OVERFLOW;
|
||||
}
|
||||
break;
|
||||
@ -80,9 +90,18 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||
byte b2 = inArray[inPos++];
|
||||
byte b3 = inArray[inPos++];
|
||||
byte b4 = inArray[inPos++];
|
||||
if (!checkMidByte(b2) || !checkMidByte(b3) || !checkMidByte(b4)) {
|
||||
inPos -= 3;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
int code = ((b & 0x07) << 18) | ((b2 & 0x3f) << 12) | ((b3 & 0x3F) << 6) | (b4 & 0x3F);
|
||||
outArray[outPos++] = UTF16Helper.highSurrogate(code);
|
||||
outArray[outPos++] = UTF16Helper.lowSurrogate(code);
|
||||
outArray[outPos++] = Character.highSurrogate(code);
|
||||
outArray[outPos++] = Character.lowSurrogate(code);
|
||||
} else {
|
||||
--inPos;
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -90,4 +109,8 @@ public class TUTF8Decoder extends TBufferedDecoder {
|
||||
controller.setOutPosition(outPos);
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean checkMidByte(byte b) {
|
||||
return (b & 0xC0) == 0x80;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
*/
|
||||
package org.teavm.classlib.java.nio.charset.impl;
|
||||
|
||||
import org.teavm.classlib.impl.charset.UTF16Helper;
|
||||
import org.teavm.classlib.java.nio.charset.TCharset;
|
||||
import org.teavm.classlib.java.nio.charset.TCoderResult;
|
||||
|
||||
@ -57,7 +56,7 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||
outArray[outPos++] = (byte)(0xE0 | (ch >> 12));
|
||||
outArray[outPos++] = (byte)(0x80 | ((ch >> 6) & 0x3F));
|
||||
outArray[outPos++] = (byte)(0x80 | (ch & 0x3F));
|
||||
} else if (UTF16Helper.isHighSurrogate(ch)) {
|
||||
} else if (Character.isHighSurrogate(ch)) {
|
||||
if (inPos >= inSize) {
|
||||
if (!controller.hasMoreInput()) {
|
||||
result = TCoderResult.UNDERFLOW;
|
||||
@ -65,9 +64,9 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||
break;
|
||||
}
|
||||
char low = inArray[inPos++];
|
||||
if (!UTF16Helper.isLowSurrogate(low)) {
|
||||
if (!Character.isLowSurrogate(low)) {
|
||||
inPos -= 2;
|
||||
result = TCoderResult.malformedForLength(2);
|
||||
result = TCoderResult.malformedForLength(1);
|
||||
break;
|
||||
}
|
||||
if (outPos + 4 > outSize) {
|
||||
@ -77,7 +76,7 @@ public class TUTF8Encoder extends TBufferedEncoder {
|
||||
}
|
||||
break;
|
||||
}
|
||||
int codePoint = UTF16Helper.buildCodePoint(ch, low);
|
||||
int codePoint = Character.toCodePoint(ch, low);
|
||||
outArray[outPos++] = (byte)(0xF0 | (codePoint >> 18));
|
||||
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 12) & 0x3F));
|
||||
outArray[outPos++] = (byte)(0x80 | ((codePoint >> 6) & 0x3F));
|
||||
|
@ -1,7 +1,7 @@
|
||||
package org.teavm.classlib.java.nio.charset;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.*;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
@ -46,6 +46,76 @@ public class UTF8Test {
|
||||
runDecode(100, 600);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceMalformedSurrogatePair() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
ByteBuffer buffer = charset.encode("a\uD800\uD800b");
|
||||
byte[] result = new byte[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertArrayEquals(new byte[] { 97, 63, 63, 98 }, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void encodeSurrogate() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
ByteBuffer buffer = charset.encode("a\uD800\uDC00b");
|
||||
byte[] result = new byte[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertArrayEquals(new byte[] { 97, -16, -112, -128, -128, 98 }, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceMalformedFirstByte() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xFF, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertEquals("a\uFFFDb", new String(result));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceMalformedMidByte() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xC0, 98, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertEquals("a\uFFFDbb", new String(result));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceDecodedSurrogate() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xED, (byte)0xA0, (byte)0x80, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertEquals("a\uFFFDb", new String(result));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceDecodedSurrogatePair() {
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(new byte[] { 97, (byte)0xED, (byte)0xA0, (byte)0x80,
|
||||
(byte)0xED, (byte)0xBF, (byte)0xBF, 98 }));
|
||||
char[] result = new char[buffer.remaining()];
|
||||
buffer.get(result);
|
||||
assertEquals("a\uFFFD\uFFFDb", new String(result));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeLongUTF8ByteArray() throws UnsupportedEncodingException {
|
||||
byte[] bytes = new byte[16384];
|
||||
for (int i = 0; i < bytes.length;) {
|
||||
bytes[i++] = -16;
|
||||
bytes[i++] = -66;
|
||||
bytes[i++] = -78;
|
||||
bytes[i++] = -69;
|
||||
}
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
CharBuffer buffer = charset.decode(ByteBuffer.wrap(bytes));
|
||||
assertEquals('\uD8BB', buffer.get(8190));
|
||||
assertEquals('\uDCBB', buffer.get(8191));
|
||||
}
|
||||
|
||||
private void runEncode(int inSize, int outSize) {
|
||||
char[] input = text.toCharArray();
|
||||
byte[] output = new byte[16384];
|
||||
|
Loading…
Reference in New Issue
Block a user