Better binary file detection

This commit is contained in:
Nico Mexis 2022-01-12 00:44:23 +01:00
parent f85ee82202
commit 5bbe04befe
No known key found for this signature in database
GPG key ID: 27D6E17CE092AB78
2 changed files with 15 additions and 7 deletions

View file

@ -73,13 +73,12 @@ public class FileViewer extends ResourceViewer
final byte[] contents = resource.getResourceBytes(); final byte[] contents = resource.getResourceBytes();
final String nameLowerCase = this.resource.name.toLowerCase(); final String nameLowerCase = this.resource.name.toLowerCase();
final String onlyName = FilenameUtils.getName(nameLowerCase); final String onlyName = FilenameUtils.getName(nameLowerCase);
final String contentsAsString = new String(contents);
final boolean hexViewerOnly = BytecodeViewer.viewer.viewPane1.getSelectedDecompiler() == Decompiler.HEXCODE_VIEWER && final boolean hexViewerOnly = BytecodeViewer.viewer.viewPane1.getSelectedDecompiler() == Decompiler.HEXCODE_VIEWER &&
BytecodeViewer.viewer.viewPane2.getSelectedDecompiler() == Decompiler.NONE && BytecodeViewer.viewer.viewPane2.getSelectedDecompiler() == Decompiler.NONE &&
BytecodeViewer.viewer.viewPane3.getSelectedDecompiler() == Decompiler.NONE; BytecodeViewer.viewer.viewPane3.getSelectedDecompiler() == Decompiler.NONE;
//image viewer //image viewer
if (!MiscUtils.isPureAscii(contentsAsString) || hexViewerOnly) if (MiscUtils.guessIfBinary(contents) || hexViewerOnly)
{ {
//TODO: //TODO:
// + Add file header checks // + Add file header checks
@ -144,7 +143,7 @@ public class FileViewer extends ResourceViewer
textArea.setCodeFoldingEnabled(true); textArea.setCodeFoldingEnabled(true);
SyntaxLanguage.setLanguage(textArea, nameLowerCase); SyntaxLanguage.setLanguage(textArea, nameLowerCase);
textArea.setText(contentsAsString); textArea.setText(new String(contents));
textArea.setFont(new Font(Font.MONOSPACED, Font.PLAIN, (int) BytecodeViewer.viewer.fontSpinner.getValue())); textArea.setFont(new Font(Font.MONOSPACED, Font.PLAIN, (int) BytecodeViewer.viewer.fontSpinner.getValue()));
textArea.setCaretPosition(0); textArea.setCaretPosition(0);

View file

@ -54,7 +54,6 @@ import static the.bytecode.club.bytecodeviewer.BytecodeViewer.gson;
public class MiscUtils public class MiscUtils
{ {
private static final CharsetEncoder asciiEncoder = StandardCharsets.US_ASCII.newEncoder(); // or "ISO-8859-1" for ISO Latin 1
private static final String AB = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; private static final String AB = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
private static final String AN = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; private static final String AN = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
private static final Random rnd = new Random(); private static final Random rnd = new Random();
@ -247,9 +246,19 @@ public class MiscUtils
while(list.size() > maxLength) while(list.size() > maxLength)
list.remove(list.size() - 1); list.remove(list.size() - 1);
} }
public static boolean isPureAscii(String v) { /**
return asciiEncoder.canEncode(v); * Returns whether the bytes most likely represent binary data.
* Based on https://stackoverflow.com/a/13533390/5894824
*/
public static boolean guessIfBinary(byte[] data) {
double ascii = 0;
double other = 0;
for (byte b : data) {
if (b == 0x09 || b == 0x0A || b == 0x0C || b == 0x0D || (b >= 0x20 && b <= 0x7E)) ascii++;
else other++;
}
return other != 0 && other / (ascii + other) > 0.25;
} }
public static Language guessLanguage() public static Language guessLanguage()