From 967be362dc3d2c71046d5edb2e03efcec790d038 Mon Sep 17 00:00:00 2001 From: Dan Church Date: Tue, 17 Nov 2020 13:30:35 -0600 Subject: [PATCH] Pre-filter based on file size Only generate hashes for files that have a size duplicate. --- simplify_static_dir.pl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/simplify_static_dir.pl b/simplify_static_dir.pl index 699641b..0dbdff3 100755 --- a/simplify_static_dir.pl +++ b/simplify_static_dir.pl @@ -168,7 +168,20 @@ MAIN: { push @files, Directory::Simplify::File->new($File::Find::name); }, @dirs_to_process); - printf STDERR "%s files found.\n", + printf STDERR "%d files found", + scalar @files + if $opts{v}; + + # Shortcut: Only generate hashes and inspect files that do not have a + # unique size. The reasoning being that file sizes do not match, there's no + # possible way those two files can have the same contents. + my %file_sizes; + ++$file_sizes{$_->{size}} foreach @files; + @files = grep { + $file_sizes{$_->{size}} > 1 + } @files; + + printf STDERR " (%d candidates).\n", scalar @files if $opts{v};