From 5bbe04befe4b6ce7d9539f71a3c9981b3268a5a2 Mon Sep 17 00:00:00 2001 From: Nico Mexis Date: Wed, 12 Jan 2022 00:44:23 +0100 Subject: [PATCH 1/2] Better binary file detection --- .../gui/resourceviewer/viewer/FileViewer.java | 5 ++--- .../club/bytecodeviewer/util/MiscUtils.java | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/main/java/the/bytecode/club/bytecodeviewer/gui/resourceviewer/viewer/FileViewer.java b/src/main/java/the/bytecode/club/bytecodeviewer/gui/resourceviewer/viewer/FileViewer.java index 1571232d..25e5d5df 100644 --- a/src/main/java/the/bytecode/club/bytecodeviewer/gui/resourceviewer/viewer/FileViewer.java +++ b/src/main/java/the/bytecode/club/bytecodeviewer/gui/resourceviewer/viewer/FileViewer.java @@ -73,13 +73,12 @@ public class FileViewer extends ResourceViewer final byte[] contents = resource.getResourceBytes(); final String nameLowerCase = this.resource.name.toLowerCase(); final String onlyName = FilenameUtils.getName(nameLowerCase); - final String contentsAsString = new String(contents); final boolean hexViewerOnly = BytecodeViewer.viewer.viewPane1.getSelectedDecompiler() == Decompiler.HEXCODE_VIEWER && BytecodeViewer.viewer.viewPane2.getSelectedDecompiler() == Decompiler.NONE && BytecodeViewer.viewer.viewPane3.getSelectedDecompiler() == Decompiler.NONE; //image viewer - if (!MiscUtils.isPureAscii(contentsAsString) || hexViewerOnly) + if (MiscUtils.guessIfBinary(contents) || hexViewerOnly) { //TODO: // + Add file header checks @@ -144,7 +143,7 @@ public class FileViewer extends ResourceViewer textArea.setCodeFoldingEnabled(true); SyntaxLanguage.setLanguage(textArea, nameLowerCase); - textArea.setText(contentsAsString); + textArea.setText(new String(contents)); textArea.setFont(new Font(Font.MONOSPACED, Font.PLAIN, (int) BytecodeViewer.viewer.fontSpinner.getValue())); textArea.setCaretPosition(0); diff --git a/src/main/java/the/bytecode/club/bytecodeviewer/util/MiscUtils.java b/src/main/java/the/bytecode/club/bytecodeviewer/util/MiscUtils.java index f586702c..71011aaa 100644 --- a/src/main/java/the/bytecode/club/bytecodeviewer/util/MiscUtils.java +++ b/src/main/java/the/bytecode/club/bytecodeviewer/util/MiscUtils.java @@ -54,7 +54,6 @@ import static the.bytecode.club.bytecodeviewer.BytecodeViewer.gson; public class MiscUtils { - private static final CharsetEncoder asciiEncoder = StandardCharsets.US_ASCII.newEncoder(); // or "ISO-8859-1" for ISO Latin 1 private static final String AB = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; private static final String AN = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; private static final Random rnd = new Random(); @@ -247,9 +246,19 @@ public class MiscUtils while(list.size() > maxLength) list.remove(list.size() - 1); } - - public static boolean isPureAscii(String v) { - return asciiEncoder.canEncode(v); + + /** + * Returns whether the bytes most likely represent binary data. + * Based on https://stackoverflow.com/a/13533390/5894824 + */ + public static boolean guessIfBinary(byte[] data) { + double ascii = 0; + double other = 0; + for (byte b : data) { + if (b == 0x09 || b == 0x0A || b == 0x0C || b == 0x0D || (b >= 0x20 && b <= 0x7E)) ascii++; + else other++; + } + return other != 0 && other / (ascii + other) > 0.25; } public static Language guessLanguage() From a0955ba68a8d01189e68f060b534e87ae8089b01 Mon Sep 17 00:00:00 2001 From: Nico Mexis Date: Thu, 27 Jan 2022 17:51:45 +0100 Subject: [PATCH 2/2] Update dependencies --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 2edfd5eb..4b687a9f 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ 0.3.4 5.2.1.Final v42 - eda981d + 5f33b55 2.8.9 31.0.1-jre 4.2 @@ -48,7 +48,7 @@ 10b32a4 3.1.6 2.1.1 - 1.7.32 + 1.7.35 2.5.2 1.30 1.0.3 @@ -401,7 +401,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.8.1 + 3.9.0 ${maven.compiler.source} ${maven.compiler.target}