mirror of
https://github.com/HMCL-dev/HMCL.git
synced 2025-01-24 14:34:15 +08:00
fix #1368: Guess Zip file encoding using Commons Compress
This commit is contained in:
parent
07f9b30ed5
commit
0f22c9e31a
@ -17,22 +17,18 @@
|
||||
*/
|
||||
package org.jackhuang.hmcl.util.io;
|
||||
|
||||
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.zip.ZipFile;
|
||||
import org.jackhuang.hmcl.util.platform.OperatingSystem;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.*;
|
||||
import java.nio.file.*;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.nio.file.spi.FileSystemProvider;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.*;
|
||||
import java.util.zip.ZipError;
|
||||
import java.util.zip.ZipException;
|
||||
|
||||
@ -51,38 +47,42 @@ public final class CompressingUtils {
|
||||
private CompressingUtils() {
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static FileVisitResult testZipPath(Path file, Path root, AtomicBoolean result) {
|
||||
try {
|
||||
root.relativize(file).toString(); // throw IllegalArgumentException for wrong encoding.
|
||||
return FileVisitResult.CONTINUE;
|
||||
} catch (Exception e) {
|
||||
result.set(false);
|
||||
return FileVisitResult.TERMINATE;
|
||||
}
|
||||
private static CharsetDecoder newCharsetDecoder(Charset charset) {
|
||||
return charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
}
|
||||
|
||||
public static boolean testEncoding(Path zipFile, Charset encoding) throws IOException {
|
||||
AtomicBoolean result = new AtomicBoolean(true);
|
||||
try (FileSystem fs = CompressingUtils.createReadOnlyZipFileSystem(zipFile, encoding)) {
|
||||
Path root = fs.getPath("/");
|
||||
Files.walkFileTree(root, new SimpleFileVisitor<Path>() {
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path file,
|
||||
BasicFileAttributes attrs) {
|
||||
return testZipPath(file, root, result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FileVisitResult preVisitDirectory(Path dir,
|
||||
BasicFileAttributes attrs) {
|
||||
return testZipPath(dir, root, result);
|
||||
}
|
||||
});
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IOException(e);
|
||||
try (ZipFile zf = openZipFile(zipFile, encoding)) {
|
||||
return testEncoding(zf, encoding);
|
||||
}
|
||||
return result.get();
|
||||
}
|
||||
|
||||
public static boolean testEncoding(ZipFile zipFile, Charset encoding) throws IOException {
|
||||
Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
|
||||
CharsetDecoder cd = newCharsetDecoder(encoding);
|
||||
CharBuffer cb = CharBuffer.allocate(32);
|
||||
|
||||
while (entries.hasMoreElements()) {
|
||||
ZipArchiveEntry entry = entries.nextElement();
|
||||
|
||||
if (entry.getGeneralPurposeBit().usesUTF8ForNames()) continue;
|
||||
|
||||
cd.reset();
|
||||
byte[] ba = entry.getRawName();
|
||||
int clen = (int)(ba.length * cd.maxCharsPerByte());
|
||||
if (clen == 0) continue;
|
||||
if (clen <= cb.capacity())
|
||||
cb.clear();
|
||||
else
|
||||
cb = CharBuffer.allocate(clen);
|
||||
|
||||
ByteBuffer bb = ByteBuffer.wrap(ba, 0, ba.length);
|
||||
CoderResult cr = cd.decode(bb, cb, true);
|
||||
if (!cr.isUnderflow()) return false;
|
||||
cr = cd.flush(cb);
|
||||
if (!cr.isUnderflow()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Charset findSuitableEncoding(Path zipFile) throws IOException {
|
||||
@ -90,6 +90,16 @@ public final class CompressingUtils {
|
||||
}
|
||||
|
||||
public static Charset findSuitableEncoding(Path zipFile, Collection<Charset> candidates) throws IOException {
|
||||
try (ZipFile zf = openZipFile(zipFile, StandardCharsets.UTF_8)) {
|
||||
return findSuitableEncoding(zf, candidates);
|
||||
}
|
||||
}
|
||||
|
||||
public static Charset findSuitableEncoding(ZipFile zipFile) throws IOException {
|
||||
return findSuitableEncoding(zipFile, Charset.availableCharsets().values());
|
||||
}
|
||||
|
||||
public static Charset findSuitableEncoding(ZipFile zipFile, Collection<Charset> candidates) throws IOException {
|
||||
if (testEncoding(zipFile, StandardCharsets.UTF_8)) return StandardCharsets.UTF_8;
|
||||
if (OperatingSystem.NATIVE_CHARSET != StandardCharsets.UTF_8 && testEncoding(zipFile, OperatingSystem.NATIVE_CHARSET))
|
||||
return OperatingSystem.NATIVE_CHARSET;
|
||||
@ -100,6 +110,14 @@ public final class CompressingUtils {
|
||||
throw new IOException("Cannot find suitable encoding for the zip.");
|
||||
}
|
||||
|
||||
public static ZipFile openZipFile(Path zipFile) throws IOException {
|
||||
return new ZipFile(Files.newByteChannel(zipFile));
|
||||
}
|
||||
|
||||
public static ZipFile openZipFile(Path zipFile, Charset charset) throws IOException {
|
||||
return new ZipFile(Files.newByteChannel(zipFile), charset.name());
|
||||
}
|
||||
|
||||
public static final class Builder {
|
||||
private boolean autoDetectEncoding = false;
|
||||
private Collection<Charset> charsetCandidates;
|
||||
@ -212,7 +230,7 @@ public final class CompressingUtils {
|
||||
* @return the plain text content of given file.
|
||||
*/
|
||||
public static String readTextZipEntry(Path zipFile, String name, Charset encoding) throws IOException {
|
||||
try (ZipFile s = new ZipFile(zipFile.toFile(), encoding.name())) {
|
||||
try (ZipFile s = openZipFile(zipFile, encoding)) {
|
||||
return IOUtils.readFullyAsString(s.getInputStream(s.getEntry(name)), StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user