mirror of
				https://codeberg.org/h3xx/simplify_static_dir
				synced 2024-08-14 23:57:24 +00:00 
			
		
		
		
	Re-write script
Breaking changes: * Remove support for symlink generation. Bugs fixed: * SHA-1 hash collisions no longer corrupt yer data. Internal changes: * Rework into a helper package
This commit is contained in:
		
							parent
							
								
									c6beadda5f
								
							
						
					
					
						commit
						79e3eca2cb
					
				
					 2 changed files with 301 additions and 174 deletions
				
			
		
							
								
								
									
										10
									
								
								CHANGELOG.md
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								CHANGELOG.md
									
										
									
									
									
								
							|  | @ -4,6 +4,16 @@ All notable changes to this project will be documented in this file. | ||||||
| 
 | 
 | ||||||
| ## [Unreleased] | ## [Unreleased] | ||||||
| 
 | 
 | ||||||
|  | ## [2.0.0] | ||||||
|  | 
 | ||||||
|  | ### Fixed | ||||||
|  | 
 | ||||||
|  | - SHA1 collisions no longer corrupt yer data | ||||||
|  | 
 | ||||||
|  | ### Changed | ||||||
|  | 
 | ||||||
|  | - Remove support for symlink generation | ||||||
|  | 
 | ||||||
| ## [1.2.1] | ## [1.2.1] | ||||||
| 
 | 
 | ||||||
| - Fixed bug when processing files with \r characters in the name | - Fixed bug when processing files with \r characters in the name | ||||||
|  |  | ||||||
|  | @ -3,7 +3,7 @@ | ||||||
| use strict; | use strict; | ||||||
| use warnings; | use warnings; | ||||||
| 
 | 
 | ||||||
| our $VERSION = '1.2.3'; | our $VERSION = '2.0.0'; | ||||||
| 
 | 
 | ||||||
| =pod | =pod | ||||||
| 
 | 
 | ||||||
|  | @ -60,14 +60,6 @@ Only match file paths matching I<REGEX>. | ||||||
| 
 | 
 | ||||||
| Exclude file paths matching I<REGEX>. | Exclude file paths matching I<REGEX>. | ||||||
| 
 | 
 | ||||||
| =item B<-s> |  | ||||||
| 
 |  | ||||||
| Generate symlinks only. |  | ||||||
| 
 |  | ||||||
| =item B<-S> |  | ||||||
| 
 |  | ||||||
| Do not generate ANY symlinks. |  | ||||||
| 
 |  | ||||||
| =item B<-z> | =item B<-z> | ||||||
| 
 | 
 | ||||||
| Include zero-length files in search. Normally they are ignored (you don't save | Include zero-length files in search. Normally they are ignored (you don't save | ||||||
|  | @ -83,8 +75,7 @@ Output version information and exit. | ||||||
| 
 | 
 | ||||||
| =back | =back | ||||||
| 
 | 
 | ||||||
| By default, scans the current directory. Files not able to be hard-linked are | By default, scans the current directory. | ||||||
| symlinked by default. |  | ||||||
| 
 | 
 | ||||||
| =head1 COPYRIGHT | =head1 COPYRIGHT | ||||||
| 
 | 
 | ||||||
|  | @ -107,27 +98,22 @@ require Digest::SHA; | ||||||
| use Getopt::Std qw/ getopts /; | use Getopt::Std qw/ getopts /; | ||||||
| 
 | 
 | ||||||
| sub HELP_MESSAGE { | sub HELP_MESSAGE { | ||||||
| #   my $fh = shift; |     my $fh = shift; | ||||||
| #   print $fh <<EOF |     print $fh <<EOF | ||||||
| #Usage: $0 [DIRS] | Usage: $0 [DIRS] | ||||||
| #Simplify a directory by hard-linking identical files. | Simplify a directory by hard-linking identical files. | ||||||
| # |  | ||||||
| #  -v           Verbose output. |  | ||||||
| #  -f           Print a sum of the number of freed bytes. |  | ||||||
| #  -m REGEX     Only match file paths matching REGEX. |  | ||||||
| #  -M REGEX     Exclude file paths matching REGEX. |  | ||||||
| #  -s           Generate symlinks only. |  | ||||||
| #  -S           Do not generate ANY symlinks. |  | ||||||
| #  -z           Include zero-length files in search. |  | ||||||
| # |  | ||||||
| #By default, scans the current directory. Files not able to be hard-linked are |  | ||||||
| #symlinked by default. |  | ||||||
| #EOF |  | ||||||
| #; |  | ||||||
|     require Pod::Text; |  | ||||||
|     my ($fh, $pod) = (shift, Pod::Text->new); |  | ||||||
|     $pod->parse_from_file($0, $fh); |  | ||||||
| 
 | 
 | ||||||
|  |   -v            Verbose output. | ||||||
|  |   -f            Print a sum of the number of freed bytes. | ||||||
|  |   -m REGEX      Only match file paths matching REGEX. | ||||||
|  |   -M REGEX      Exclude file paths matching REGEX. | ||||||
|  |   -z            Include zero-length files in search. | ||||||
|  | 
 | ||||||
|  | By default, scans the current directory. | ||||||
|  | 
 | ||||||
|  | See also `perldoc $0' | ||||||
|  | EOF | ||||||
|  | ; | ||||||
|     exit 0; |     exit 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -136,14 +122,16 @@ my %opts = ( | ||||||
|     f => 0, |     f => 0, | ||||||
|     m => '', |     m => '', | ||||||
|     M => '', |     M => '', | ||||||
|     s => 0, |  | ||||||
|     S => 0, |  | ||||||
|     z => 0, |     z => 0, | ||||||
| ); | ); | ||||||
| 
 | 
 | ||||||
| &getopts('vfm:M:sSz', \%opts); | &getopts('vfm:M:z', \%opts); | ||||||
| 
 | 
 | ||||||
| my %files; | my $filehash = new Directory::Simplify::FileHash; | ||||||
|  | 
 | ||||||
|  | # include zero-length files if wanted (`-z') | ||||||
|  | $filehash->min_linkable_size(0) | ||||||
|  |     if $opts{z}; | ||||||
| 
 | 
 | ||||||
| # correct relative paths | # correct relative paths | ||||||
| # OR if no directories given, search the current directory | # OR if no directories given, search the current directory | ||||||
|  | @ -153,6 +141,10 @@ my $freed_bytes = 0; | ||||||
| 
 | 
 | ||||||
| &find(\&findexec, @ARGV); | &find(\&findexec, @ARGV); | ||||||
| 
 | 
 | ||||||
|  | printf STDERR "freed %d bytes (%0.4G %s)\n", | ||||||
|  |     $freed_bytes, &hr_size($freed_bytes) | ||||||
|  |         if $opts{f} or $opts{v}; | ||||||
|  | 
 | ||||||
| sub findexec { | sub findexec { | ||||||
|     # outright skip directories (don't report skip) |     # outright skip directories (don't report skip) | ||||||
|     return if -d $File::Find::name; |     return if -d $File::Find::name; | ||||||
|  | @ -180,86 +172,43 @@ sub findexec { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     #my $ctx = Digest::MD5->new; |     my $entry = $filehash->make_entry($File::Find::name); | ||||||
|     my $ctx = Digest::SHA->new; |  | ||||||
|     $ctx->addfile($File::Find::name); |  | ||||||
| 
 | 
 | ||||||
|     # save the hex digest because reading the value from |     my @linkable = $filehash->find_hardlinkable($entry); | ||||||
|     # Digest::* destroys it |     if (@linkable) { | ||||||
|     my $digest = $ctx->hexdigest; |         &hardlink_entries($entry, @linkable); | ||||||
| 
 |  | ||||||
|     # organize results from lstat into hash |  | ||||||
|     my $entry = {}; |  | ||||||
|     (@{$entry}{qw/ name dev ino mode nlink uid gid rdev size |  | ||||||
|             atime mtime ctime blksize blocks /}) |  | ||||||
|         = ($File::Find::name, lstat $File::Find::name); |  | ||||||
| 
 |  | ||||||
|     # skip zero-length files if wanted (`-z') |  | ||||||
|     return unless $opts{z} or $entry->{size}; |  | ||||||
| 
 |  | ||||||
|     # check to see if we've come across a file with the same checksum |  | ||||||
|     unless (exists $files{$digest}) { |  | ||||||
|         # the file is unique (as far as we know) |  | ||||||
|         # create a new entry in the hash table |  | ||||||
|         $files{$digest} = $entry; |  | ||||||
|         return; |  | ||||||
|     } |     } | ||||||
|  |     $filehash->add_entry($entry); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|     my $curr_entry = $files{$digest}; | sub hardlink_entries { | ||||||
|  |     my ($entry, @linkable) = @_; | ||||||
| 
 | 
 | ||||||
|     # don't waste my time |     # only one of the linkable entries should suffice | ||||||
|     return if $curr_entry->{name} eq $entry->{name} or |     my $linking_with = $linkable[0]; | ||||||
|         $curr_entry->{ino} == $entry->{ino}; |  | ||||||
| 
 | 
 | ||||||
|     # identical files should be the same size (I'm paranoid |     # calculate the timestamp of the resulting file | ||||||
|     # of checksumming procedures); if it's not, there's a |     my ($atime, $mtime) = @{( | ||||||
|     # problem with the checksumming procedure or this |         $filehash->oldest_mtime($entry, $linking_with) | ||||||
|     # script is processing WAY too many files |  | ||||||
|     # (addendum: perhaps I should congratulate the user on |  | ||||||
|     # finding a collision in SHA-1) |  | ||||||
|     if ($curr_entry->{size} != $entry->{size}) { |  | ||||||
|         die "ERROR: checksums identical for two non-identical files!!!:\n". |  | ||||||
|         "files:\t`$curr_entry->{name}'\n". |  | ||||||
|         "\t`$entry->{name}'\n". |  | ||||||
|         "SHA1: ($digest)\n". |  | ||||||
|         '(this is probably a limit of SHA1; try processing fewer files)'; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     # find the oldest time stamp |  | ||||||
|     my ($atime, $mtime) = @{(sort |  | ||||||
|         {$a->{mtime} <=> $b->{mtime}} |  | ||||||
|         ($entry, $curr_entry) |  | ||||||
|     )[0]}{qw/ atime mtime /}; |     )[0]}{qw/ atime mtime /}; | ||||||
| 
 | 
 | ||||||
|     # find the file less embedded in the file system |     # find the file less embedded in the file system | ||||||
|     my ($less_linked, $more_linked) = sort |     my ($less_linked, $more_linked) = $filehash->less_linked($entry, $linking_with); | ||||||
|         {$a->{nlink} <=> $b->{nlink}} | 
 | ||||||
|         ($entry, $curr_entry); |     printf STDERR "removing file `%s'\n", $less_linked->{name} | ||||||
|  |         if $opts{v}; | ||||||
| 
 | 
 | ||||||
|     # hard-linkable files must exist on the same device and |  | ||||||
|     # must not already be hard-linked |  | ||||||
|     if ($curr_entry->{dev} == $entry->{dev} && |  | ||||||
|         ! $opts{s}) { |  | ||||||
|         # attempt to unlink the file |  | ||||||
|         printf STDERR "removing file `%s'\n", |  | ||||||
|             $less_linked->{name} if $opts{v}; |  | ||||||
|     unless (unlink $less_linked->{name}) { |     unless (unlink $less_linked->{name}) { | ||||||
| 
 |  | ||||||
|             # couldn't do it; try more-linked file |  | ||||||
| 
 |  | ||||||
|         printf STDERR "Failed to remove file `%s': %s\n(using `%s')\n", |         printf STDERR "Failed to remove file `%s': %s\n(using `%s')\n", | ||||||
|             $less_linked->{name}, |             $less_linked->{name}, | ||||||
|             $!, |             $!, | ||||||
|                 $more_linked->{name} |             $more_linked->{name}; | ||||||
|             if $opts{v}; |  | ||||||
| 
 | 
 | ||||||
|             # if we can't do this, there's no point |         # if we can't do this, there's no point in continuing | ||||||
|             # in continuing |  | ||||||
|         unless (unlink $more_linked->{name}) { |         unless (unlink $more_linked->{name}) { | ||||||
|             printf STDERR "Failed to remove file `%s' (second failure on match): %s\nSkipping...\n", |             printf STDERR "Failed to remove file `%s' (second failure on match): %s\nSkipping...\n", | ||||||
|                 $more_linked->{name}, |                 $more_linked->{name}, | ||||||
|                     $! |                 $!; | ||||||
|                     if $opts{v}; |  | ||||||
| 
 | 
 | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -267,7 +216,6 @@ sub findexec { | ||||||
|         # the ol' switcheroo |         # the ol' switcheroo | ||||||
|         ($more_linked, $less_linked) = |         ($more_linked, $less_linked) = | ||||||
|         ($less_linked, $more_linked); |         ($less_linked, $more_linked); | ||||||
| 
 |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     # we unlinked it or failed out |     # we unlinked it or failed out | ||||||
|  | @ -282,63 +230,29 @@ sub findexec { | ||||||
|     link $more_linked->{name}, |     link $more_linked->{name}, | ||||||
|     $less_linked->{name}; |     $less_linked->{name}; | ||||||
| 
 | 
 | ||||||
|         # update link count in our hash to reflect the |     # preserve older time stamp | ||||||
|         # file system (referenced) |     utime $atime, $mtime, $more_linked->{name}; | ||||||
|  |     $more_linked->{atime} = $atime; | ||||||
|  |     $more_linked->{mtime} = $mtime; | ||||||
|  | 
 | ||||||
|  |     # update link count in our hash to reflect the file system (referenced) | ||||||
|     ++$more_linked->{nlink}; |     ++$more_linked->{nlink}; | ||||||
| 
 | 
 | ||||||
|         # preserve older time stamp |     # update old entry with the info from the new one | ||||||
|         utime $atime, $mtime, $less_linked->{name}; |     foreach my $copy_attr (qw/ | ||||||
|     } elsif (! $opts{S}) { |         ino | ||||||
|         # files are on different drives; |         nlink | ||||||
|         # most that can be done is to soft-link them |         mode | ||||||
| 
 |         uid | ||||||
|         unless (unlink $less_linked->{name}) { |         gid | ||||||
| 
 |         atime | ||||||
|             # couldn't do it; try more-linked file |         mtime | ||||||
| 
 |         ctime | ||||||
|             printf STDERR "couldn't remove file `%s' (using `%s')\n", |     /) { | ||||||
|             $less_linked->{name}, |         $less_linked->{$copy_attr} = $more_linked->{$copy_attr}; | ||||||
|             $more_linked->{name} if $opts{v}; |  | ||||||
| 
 |  | ||||||
|             # if we can't do this, there's no point |  | ||||||
|             # in continuing |  | ||||||
|             unlink $more_linked->{name} |  | ||||||
|                 or return; |  | ||||||
| 
 |  | ||||||
|             # the ol' switcheroo |  | ||||||
|             ($more_linked, $less_linked) = |  | ||||||
|             ($less_linked, $more_linked); |  | ||||||
| 
 |  | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|         # we unlinked it or failed out |  | ||||||
|         $freed_bytes += $less_linked->{size}; |  | ||||||
| 
 |  | ||||||
|         printf STDERR "soft-linking %s => %s\n", |  | ||||||
|             $less_linked->{name}, $more_linked->{name} |  | ||||||
|         if $opts{v}; |  | ||||||
| 
 |  | ||||||
|         # create a soft link (TODO: relative links) |  | ||||||
|         symlink $more_linked->{name}, |  | ||||||
|         $less_linked->{name}; |  | ||||||
| 
 |  | ||||||
|         # preserve older time stamp |  | ||||||
|         utime $atime, $mtime, $less_linked->{name}; |  | ||||||
|     } |  | ||||||
|     #} elsif (-l $File::Find::name) { |  | ||||||
|     #   # do something to simplify symlinks |  | ||||||
|     #   printf STDERR "FIXME: simplifying symlink `%s'\n", |  | ||||||
|     #   $File::Find::name |  | ||||||
|     #   if $opts{v}; |  | ||||||
| 
 |  | ||||||
|     #   printf STDERR "symlink `%s' points to `%s'\n", |  | ||||||
|     #   $File::Find::name, readlink $File::Find::name; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| printf STDERR "freed %d bytes (%0.4G %s)\n", |  | ||||||
|     $freed_bytes, &hr_size($freed_bytes) |  | ||||||
|         if $opts{f} or $opts{v}; |  | ||||||
| 
 |  | ||||||
| sub hr_size { | sub hr_size { | ||||||
|     my $sz = shift; |     my $sz = shift; | ||||||
|     my @sizes = qw/ B KB MB GB TB PB EB ZB YB /; |     my @sizes = qw/ B KB MB GB TB PB EB ZB YB /; | ||||||
|  | @ -357,3 +271,206 @@ sub hr_size { | ||||||
| 
 | 
 | ||||||
|     wantarray ? @ret : "@ret" |     wantarray ? @ret : "@ret" | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | package Directory::Simplify::FileHash; | ||||||
|  | use strict; | ||||||
|  | use warnings; | ||||||
|  | 
 | ||||||
|  | =head1 DESCRIPTION | ||||||
|  | 
 | ||||||
|  | Object for abstracting management of a hashed filesystem | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub new { | ||||||
|  |     my ($class, $self) = (shift, {}); | ||||||
|  | 
 | ||||||
|  |     $self->{_files} = {}; | ||||||
|  | 
 | ||||||
|  |     require Digest::SHA; | ||||||
|  |     $self->{_ctx} = Digest::SHA->new; | ||||||
|  | 
 | ||||||
|  |     # default options | ||||||
|  |     $self->{_min_linkable_size} = 1; | ||||||
|  | 
 | ||||||
|  |     bless $self, $class | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | =head2 min_linkable_size($bytes) | ||||||
|  | 
 | ||||||
|  | Set or get the minimum size of files to be considered hard-linkable. Default is 1. | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub min_linkable_size { | ||||||
|  |     my $self = shift; | ||||||
|  |     my $in = shift; | ||||||
|  |     if (defined $in) { | ||||||
|  |         $self->{_min_linkable_size} = $in; | ||||||
|  |     } | ||||||
|  |     $self->{_min_linkable_size} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | =head2 make_entry($filename) | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub make_entry { | ||||||
|  |     my $self = shift; | ||||||
|  |     my ($filename) = @_; | ||||||
|  |     # organize results from lstat into hash | ||||||
|  |     my $entry = {}; | ||||||
|  |     (@{$entry}{qw/ name dev ino mode nlink uid gid rdev size | ||||||
|  |             atime mtime ctime blksize blocks /}) | ||||||
|  |         = ($filename, lstat $filename); | ||||||
|  | 
 | ||||||
|  |     $entry->{hash} = $self->_hash($filename); | ||||||
|  | 
 | ||||||
|  |     $entry | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | =head2 add_entry($entry) | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub add_entry { | ||||||
|  |     my $self = shift; | ||||||
|  |     my ($entry) = @_; | ||||||
|  | 
 | ||||||
|  |     my $hash = $entry->{hash}; | ||||||
|  | 
 | ||||||
|  |     unless (defined $self->{_files}->{$hash}) { | ||||||
|  |         $self->{_files}->{$hash} = []; | ||||||
|  |     } | ||||||
|  |     push @{$self->{_files}->{$hash}}, $entry; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub find_hardlinkable { | ||||||
|  |     my $self = shift; | ||||||
|  |     my ($entry) = @_; | ||||||
|  | 
 | ||||||
|  |     my $hash = $entry->{hash}; | ||||||
|  | 
 | ||||||
|  |     # no matching entries | ||||||
|  |     unless (defined $self->{_files}->{$hash}) { | ||||||
|  |         return (); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     my @matches; | ||||||
|  |     foreach my $ent (@{$self->{_files}->{$hash}}) { | ||||||
|  |         if ($self->_entries_are_hard_linkable($entry, $ent)) { | ||||||
|  |             push @matches, $ent; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return @matches; | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | =head2 oldest($entry_a, $entry_b, ...) | ||||||
|  | 
 | ||||||
|  | Find the file less embedded in the file system. | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub less_linked { | ||||||
|  |     my $self = shift; | ||||||
|  |     return sort | ||||||
|  |         {$a->{nlink} <=> $b->{nlink}} | ||||||
|  |         @_; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | =head2 oldest($entry_a, $entry_b, ...) | ||||||
|  | 
 | ||||||
|  | Find the entry with the oldest time stamp. | ||||||
|  | 
 | ||||||
|  | =cut | ||||||
|  | 
 | ||||||
|  | sub oldest_mtime { | ||||||
|  |     my $self = shift; | ||||||
|  | 
 | ||||||
|  |     return sort | ||||||
|  |         {$a->{mtime} <=> $b->{mtime}} | ||||||
|  |         @_; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _hash { | ||||||
|  |     my $self = shift; | ||||||
|  |     my ($filename) = @_; | ||||||
|  |     $self->{_ctx}->addfile($filename); | ||||||
|  |     return $self->{_ctx}->hexdigest; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _entries_are_hard_linkable { | ||||||
|  |     my $self = shift; | ||||||
|  |     my ($entry_a, $entry_b) = @_; | ||||||
|  | 
 | ||||||
|  |     # obviously, if the sizes aren't the same, they're not the same file | ||||||
|  |     unless (&_entries_sizes_match($entry_a, $entry_b)) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     # too small to be hard-linked | ||||||
|  |     if ($entry_a->{size} < $self->min_linkable_size) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     # they're the same file, don't try it | ||||||
|  |     if (&_entries_are_same_filename($entry_a, $entry_b)) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  |     if (&_entries_are_already_hard_linked($entry_a, $entry_b)) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (&_entries_contents_match($entry_a, $entry_b)) { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _entries_are_same_filename { | ||||||
|  |     my ($entry_a, $entry_b) = @_; | ||||||
|  | 
 | ||||||
|  |     if ($entry_a->{name} eq $entry_b->{name}) { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _entries_are_already_hard_linked { | ||||||
|  |     my ($entry_a, $entry_b) = @_; | ||||||
|  | 
 | ||||||
|  |     if ($entry_a->{ino} == $entry_b->{ino} and | ||||||
|  |         $entry_a->{dev} == $entry_b->{dev}) { | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _entries_sizes_match { | ||||||
|  |     my ($entry_a, $entry_b) = @_; | ||||||
|  |     if ($entry_a->{size} != $entry_b->{size}) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  |     return 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | sub _entries_contents_match { | ||||||
|  |     my ($entry_a, $entry_b) = @_; | ||||||
|  | 
 | ||||||
|  |     # also, if the hashes aren't the same, they cannot be the same file | ||||||
|  |     if ($entry_a->{hash} ne $entry_b->{hash}) { | ||||||
|  |         return 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     use File::Compare   qw/ compare /; | ||||||
|  |     my $contents_same = (0 == &compare($entry_a->{name}, $entry_b->{name})); | ||||||
|  | 
 | ||||||
|  |     # warn about hash collision | ||||||
|  |     unless ($contents_same) { | ||||||
|  |         warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n  (don't panic)\n"; | ||||||
|  |     } | ||||||
|  |     return $contents_same; | ||||||
|  | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue