mirror of
				https://codeberg.org/h3xx/simplify_static_dir
				synced 2024-08-14 23:57:24 +00:00 
			
		
		
		
	Clean up script
This commit is contained in:
		
							parent
							
								
									801bed9fa9
								
							
						
					
					
						commit
						a10996e70f
					
				
					 1 changed files with 143 additions and 155 deletions
				
			
		|  | @ -3,7 +3,7 @@ | ||||||
| use strict; | use strict; | ||||||
| use warnings; | use warnings; | ||||||
| 
 | 
 | ||||||
| our $VERSION = '1.2.2'; | our $VERSION = '1.2.3'; | ||||||
| 
 | 
 | ||||||
| =pod | =pod | ||||||
| 
 | 
 | ||||||
|  | @ -115,7 +115,7 @@ Fixed bug when processing files with \r characters in the name. | ||||||
| 
 | 
 | ||||||
| =head1 COPYRIGHT | =head1 COPYRIGHT | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2010-2013 Dan Church. | Copyright (C) 2010-2018 Dan Church. | ||||||
| 
 | 
 | ||||||
| License GPLv3+: GNU GPL version 3 or later (L<http://gnu.org/licenses/gpl.html>). | License GPLv3+: GNU GPL version 3 or later (L<http://gnu.org/licenses/gpl.html>). | ||||||
| 
 | 
 | ||||||
|  | @ -132,7 +132,6 @@ Written by Dan Church S<E<lt>amphetamachine@gmail.comE<gt>> | ||||||
| use File::Find qw/ find /; | use File::Find qw/ find /; | ||||||
| require Digest::SHA; | require Digest::SHA; | ||||||
| use Getopt::Std qw/ getopts /; | use Getopt::Std qw/ getopts /; | ||||||
| require Pod::Text; |  | ||||||
| 
 | 
 | ||||||
| sub HELP_MESSAGE { | sub HELP_MESSAGE { | ||||||
| #   my $fh = shift; | #   my $fh = shift; | ||||||
|  | @ -152,6 +151,7 @@ sub HELP_MESSAGE { | ||||||
| #symlinked by default. | #symlinked by default. | ||||||
| #EOF | #EOF | ||||||
| #; | #; | ||||||
|  |     require Pod::Text; | ||||||
|     my ($fh, $pod) = (shift, Pod::Text->new); |     my ($fh, $pod) = (shift, Pod::Text->new); | ||||||
|     $pod->parse_from_file($0, $fh); |     $pod->parse_from_file($0, $fh); | ||||||
| 
 | 
 | ||||||
|  | @ -202,167 +202,156 @@ sub findexec { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     # make sure the file exists and it's not a link |     # skip non-existent files and links | ||||||
|     if (-f $File::Find::name && ! -l $File::Find::name) { |     unless (-f $File::Find::name && ! -l $File::Find::name) { | ||||||
|         #my $ctx = Digest::MD5->new; |         return; | ||||||
|         my $ctx = Digest::SHA->new; |     } | ||||||
|         $ctx->addfile($File::Find::name); |  | ||||||
| 
 | 
 | ||||||
|         # save the hex digest because reading the value from |     #my $ctx = Digest::MD5->new; | ||||||
|         # Digest::* destroys it |     my $ctx = Digest::SHA->new; | ||||||
|         my $digest = $ctx->hexdigest; |     $ctx->addfile($File::Find::name); | ||||||
| 
 | 
 | ||||||
|         # organize results from lstat into hash |     # save the hex digest because reading the value from | ||||||
|         my $entry = {}; |     # Digest::* destroys it | ||||||
|         (@{$entry}{qw/ name dev ino mode nlink uid gid rdev size |     my $digest = $ctx->hexdigest; | ||||||
|                 atime mtime ctime blksize blocks /}) |  | ||||||
|             = ($File::Find::name, lstat $File::Find::name); |  | ||||||
| 
 | 
 | ||||||
|         # skip zero-length files if wanted (`-z') |     # organize results from lstat into hash | ||||||
|         # truth table: |     my $entry = {}; | ||||||
|         # -z | non-zero length | return? |     (@{$entry}{qw/ name dev ino mode nlink uid gid rdev size | ||||||
|         # 0  | 0               | 1 |             atime mtime ctime blksize blocks /}) | ||||||
|         # 0  | 1               | 0 |         = ($File::Find::name, lstat $File::Find::name); | ||||||
|         # 1  | 0               | 0 |  | ||||||
|         # 1  | 1               | 0 |  | ||||||
|         return unless $opts{z} or $entry->{size}; |  | ||||||
| 
 | 
 | ||||||
|         # check to see if we've come across a file with the same crc |     # skip zero-length files if wanted (`-z') | ||||||
|         if (exists $files{$digest}) { |     return unless $opts{z} or $entry->{size}; | ||||||
|             my $curr_entry = $files{$digest}; |  | ||||||
| 
 | 
 | ||||||
|             # don't waste my time |     # check to see if we've come across a file with the same checksum | ||||||
|             return if $curr_entry->{name} eq $entry->{name} or |     unless (exists $files{$digest}) { | ||||||
|                 $curr_entry->{ino} == $entry->{ino}; |         # the file is unique (as far as we know) | ||||||
|  |         # create a new entry in the hash table | ||||||
|  |         $files{$digest} = $entry; | ||||||
|  |         return; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|             # identical files should be the same size (I'm paranoid |     my $curr_entry = $files{$digest}; | ||||||
|             # of checksumming procedures); if it's not, there's a | 
 | ||||||
|             # problem with the checksumming procedure or this |     # don't waste my time | ||||||
|             # script is processing WAY too many files |     return if $curr_entry->{name} eq $entry->{name} or | ||||||
|             # (addendum: perhaps I should congratulate the user on |         $curr_entry->{ino} == $entry->{ino}; | ||||||
|             # finding a collision in SHA-1) | 
 | ||||||
|             if ($curr_entry->{size} != $entry->{size}) { |     # identical files should be the same size (I'm paranoid | ||||||
| die "ERROR: checksums identical for two non-identical files!!!:\n". |     # of checksumming procedures); if it's not, there's a | ||||||
|     "files:\t`$curr_entry->{name}'\n". |     # problem with the checksumming procedure or this | ||||||
|           "\t`$entry->{name}'\n". |     # script is processing WAY too many files | ||||||
|     "SHA1: ($digest)\n". |     # (addendum: perhaps I should congratulate the user on | ||||||
|     '(this is probably a limit of SHA1; try processing fewer files)'; |     # finding a collision in SHA-1) | ||||||
|  |     if ($curr_entry->{size} != $entry->{size}) { | ||||||
|  |         die "ERROR: checksums identical for two non-identical files!!!:\n". | ||||||
|  |         "files:\t`$curr_entry->{name}'\n". | ||||||
|  |         "\t`$entry->{name}'\n". | ||||||
|  |         "SHA1: ($digest)\n". | ||||||
|  |         '(this is probably a limit of SHA1; try processing fewer files)'; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     # find the oldest time stamp | ||||||
|  |     my ($atime, $mtime) = @{(sort | ||||||
|  |         {$a->{mtime} <=> $b->{mtime}} | ||||||
|  |         ($entry, $curr_entry) | ||||||
|  |     )[0]}{qw/ atime mtime /}; | ||||||
|  | 
 | ||||||
|  |     # find the file less embedded in the file system | ||||||
|  |     my ($less_linked, $more_linked) = sort | ||||||
|  |         {$a->{nlink} <=> $b->{nlink}} | ||||||
|  |         ($entry, $curr_entry); | ||||||
|  | 
 | ||||||
|  |     # hard-linkable files must exist on the same device and | ||||||
|  |     # must not already be hard-linked | ||||||
|  |     if ($curr_entry->{dev} == $entry->{dev} && | ||||||
|  |         ! $opts{s}) { | ||||||
|  |         # attempt to unlink the file | ||||||
|  |         printf STDERR "removing file `%s'\n", | ||||||
|  |             $less_linked->{name} if $opts{v}; | ||||||
|  |         unless (unlink $less_linked->{name}) { | ||||||
|  | 
 | ||||||
|  |             # couldn't do it; try more-linked file | ||||||
|  | 
 | ||||||
|  |             printf STDERR "Failed to remove file `%s': %s\n(using `%s')\n", | ||||||
|  |                 $less_linked->{name}, | ||||||
|  |                 $!, | ||||||
|  |                 $more_linked->{name} | ||||||
|  |             if $opts{v}; | ||||||
|  | 
 | ||||||
|  |             # if we can't do this, there's no point | ||||||
|  |             # in continuing | ||||||
|  |             unless (unlink $more_linked->{name}) { | ||||||
|  |                 printf STDERR "Failed to remove file `%s' (second failure on match): %s\nSkipping...\n", | ||||||
|  |                     $more_linked->{name}, | ||||||
|  |                     $! | ||||||
|  |                     if $opts{v}; | ||||||
|  | 
 | ||||||
|  |                 return; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             # find the oldest time stamp |             # the ol' switcheroo | ||||||
|             my ($atime, $mtime) = @{(sort |             ($more_linked, $less_linked) = | ||||||
|                 {$a->{mtime} <=> $b->{mtime}} |             ($less_linked, $more_linked); | ||||||
|                 ($entry, $curr_entry) |  | ||||||
|             )[0]}{qw/ atime mtime /}; |  | ||||||
| 
 | 
 | ||||||
|             # find the file less embedded in the file system |  | ||||||
|             my ($less_linked, $more_linked) = sort |  | ||||||
|                 {$a->{nlink} <=> $b->{nlink}} |  | ||||||
|                 ($entry, $curr_entry); |  | ||||||
| 
 |  | ||||||
|             # hard-linkable files must exist on the same device and |  | ||||||
|             # must not already be hard-linked |  | ||||||
|             if ($curr_entry->{dev} == $entry->{dev} && |  | ||||||
|                 ! $opts{s}) { |  | ||||||
| #               print "hard-linking $new_file\t=>$old_file\n"; |  | ||||||
|                 # attempt to unlink the file |  | ||||||
|                 printf STDERR "removing file `%s'\n", |  | ||||||
|                     $less_linked->{name} if $opts{v}; |  | ||||||
|                 unless (unlink $less_linked->{name}) { |  | ||||||
| 
 |  | ||||||
|                     # couldn't do it; try more-linked file |  | ||||||
| 
 |  | ||||||
|                     printf STDERR <<EOF |  | ||||||
| Failed to remove file `%s': %s |  | ||||||
| (using `%s') |  | ||||||
| EOF |  | ||||||
| , |  | ||||||
|                     $less_linked->{name}, |  | ||||||
|                     $!, |  | ||||||
|                     $more_linked->{name} |  | ||||||
|                         if $opts{v}; |  | ||||||
| 
 |  | ||||||
|                     # if we can't do this, there's no point |  | ||||||
|                     # in continuing |  | ||||||
|                     unless (unlink $more_linked->{name}) { |  | ||||||
| printf STDERR <<EOF |  | ||||||
| Failed to remove file `%s' (second failure on match): %s |  | ||||||
| Skipping... |  | ||||||
| EOF |  | ||||||
| , |  | ||||||
|                         $more_linked->{name}, |  | ||||||
|                         $! |  | ||||||
|                             if $opts{v}; |  | ||||||
| 
 |  | ||||||
|                         return; |  | ||||||
|                     } |  | ||||||
| 
 |  | ||||||
|                     # the ol' switcheroo |  | ||||||
|                     ($more_linked, $less_linked) = |  | ||||||
|                     ($less_linked, $more_linked); |  | ||||||
| 
 |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 # we unlinked it or failed out |  | ||||||
|                 $freed_bytes += $less_linked->{size} |  | ||||||
|                     unless $less_linked->{nlink} > 1; |  | ||||||
| 
 |  | ||||||
|                 printf STDERR "hard linking `%s' => `%s'\n", |  | ||||||
|                 $less_linked->{name}, $more_linked->{name} |  | ||||||
|                 if $opts{v}; |  | ||||||
| 
 |  | ||||||
|                 # hard link the files |  | ||||||
|                 link $more_linked->{name}, |  | ||||||
|                 $less_linked->{name}; |  | ||||||
| 
 |  | ||||||
|                 # update link count in our hash to reflect the |  | ||||||
|                 # file system (referenced) |  | ||||||
|                 ++$more_linked->{nlink}; |  | ||||||
| 
 |  | ||||||
|                 # preserve older time stamp |  | ||||||
|                 utime $atime, $mtime, $less_linked->{name}; |  | ||||||
|             } elsif (! $opts{S}) { |  | ||||||
|                 # files are on different drives; |  | ||||||
|                 # most that can be done is to soft-link them |  | ||||||
| 
 |  | ||||||
|                 unless (unlink $less_linked->{name}) { |  | ||||||
| 
 |  | ||||||
|                     # couldn't do it; try more-linked file |  | ||||||
| 
 |  | ||||||
|                     printf STDERR "couldn't remove file `%s' (using `%s')\n", |  | ||||||
|                     $less_linked->{name}, |  | ||||||
|                     $more_linked->{name} if $opts{v}; |  | ||||||
| 
 |  | ||||||
|                     # if we can't do this, there's no point |  | ||||||
|                     # in continuing |  | ||||||
|                     unlink $more_linked->{name} |  | ||||||
|                         or return; |  | ||||||
| 
 |  | ||||||
|                     # the ol' switcheroo |  | ||||||
|                     ($more_linked, $less_linked) = |  | ||||||
|                     ($less_linked, $more_linked); |  | ||||||
| 
 |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 # we unlinked it or failed out |  | ||||||
|                 $freed_bytes += $less_linked->{size}; |  | ||||||
| 
 |  | ||||||
|                 printf STDERR "soft-linking %s => %s\n", |  | ||||||
|                 $less_linked->{name}, $more_linked->{name} |  | ||||||
|                 if $opts{v}; |  | ||||||
| 
 |  | ||||||
|                 # create a soft link (TODO: relative links) |  | ||||||
|                 symlink $more_linked->{name}, |  | ||||||
|                 $less_linked->{name}; |  | ||||||
| 
 |  | ||||||
|                 # preserve older time stamp |  | ||||||
|                 utime $atime, $mtime, $less_linked->{name}; |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             # the file is unique (as far as we know) |  | ||||||
|             # create a new entry in the hash table |  | ||||||
|             $files{$digest} = $entry; |  | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         # we unlinked it or failed out | ||||||
|  |         $freed_bytes += $less_linked->{size} | ||||||
|  |             unless $less_linked->{nlink} > 1; | ||||||
|  | 
 | ||||||
|  |         printf STDERR "hard linking `%s' => `%s'\n", | ||||||
|  |             $less_linked->{name}, $more_linked->{name} | ||||||
|  |         if $opts{v}; | ||||||
|  | 
 | ||||||
|  |         # hard link the files | ||||||
|  |         link $more_linked->{name}, | ||||||
|  |         $less_linked->{name}; | ||||||
|  | 
 | ||||||
|  |         # update link count in our hash to reflect the | ||||||
|  |         # file system (referenced) | ||||||
|  |         ++$more_linked->{nlink}; | ||||||
|  | 
 | ||||||
|  |         # preserve older time stamp | ||||||
|  |         utime $atime, $mtime, $less_linked->{name}; | ||||||
|  |     } elsif (! $opts{S}) { | ||||||
|  |         # files are on different drives; | ||||||
|  |         # most that can be done is to soft-link them | ||||||
|  | 
 | ||||||
|  |         unless (unlink $less_linked->{name}) { | ||||||
|  | 
 | ||||||
|  |             # couldn't do it; try more-linked file | ||||||
|  | 
 | ||||||
|  |             printf STDERR "couldn't remove file `%s' (using `%s')\n", | ||||||
|  |             $less_linked->{name}, | ||||||
|  |             $more_linked->{name} if $opts{v}; | ||||||
|  | 
 | ||||||
|  |             # if we can't do this, there's no point | ||||||
|  |             # in continuing | ||||||
|  |             unlink $more_linked->{name} | ||||||
|  |                 or return; | ||||||
|  | 
 | ||||||
|  |             # the ol' switcheroo | ||||||
|  |             ($more_linked, $less_linked) = | ||||||
|  |             ($less_linked, $more_linked); | ||||||
|  | 
 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         # we unlinked it or failed out | ||||||
|  |         $freed_bytes += $less_linked->{size}; | ||||||
|  | 
 | ||||||
|  |         printf STDERR "soft-linking %s => %s\n", | ||||||
|  |             $less_linked->{name}, $more_linked->{name} | ||||||
|  |         if $opts{v}; | ||||||
|  | 
 | ||||||
|  |         # create a soft link (TODO: relative links) | ||||||
|  |         symlink $more_linked->{name}, | ||||||
|  |         $less_linked->{name}; | ||||||
|  | 
 | ||||||
|  |         # preserve older time stamp | ||||||
|  |         utime $atime, $mtime, $less_linked->{name}; | ||||||
|  |     } | ||||||
|     #} elsif (-l $File::Find::name) { |     #} elsif (-l $File::Find::name) { | ||||||
|     #   # do something to simplify symlinks |     #   # do something to simplify symlinks | ||||||
|     #   printf STDERR "FIXME: simplifying symlink `%s'\n", |     #   printf STDERR "FIXME: simplifying symlink `%s'\n", | ||||||
|  | @ -371,7 +360,6 @@ EOF | ||||||
| 
 | 
 | ||||||
|     #   printf STDERR "symlink `%s' points to `%s'\n", |     #   printf STDERR "symlink `%s' points to `%s'\n", | ||||||
|     #   $File::Find::name, readlink $File::Find::name; |     #   $File::Find::name, readlink $File::Find::name; | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| printf STDERR "freed %d bytes (%0.4G %s)\n", | printf STDERR "freed %d bytes (%0.4G %s)\n", | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue