mirror of
				https://codeberg.org/h3xx/simplify_static_dir
				synced 2024-08-14 23:57:24 +00:00 
			
		
		
		
	Move library code to lib/
Fixes accidental inclusion of test instrumentation into the all-in-one script.
This commit is contained in:
		
							parent
							
								
									9dc3e4578c
								
							
						
					
					
						commit
						4d4edd5e9d
					
				
					 8 changed files with 2 additions and 2 deletions
				
			
		
							
								
								
									
										31
									
								
								lib/Directory/Simplify/File.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								lib/Directory/Simplify/File.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,31 @@ | |||
| package Directory::Simplify::File; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| require Cwd; | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     my $rel_name = shift; | ||||
|     my $self = bless { | ||||
|         rel_name => $rel_name, | ||||
|         name => Cwd::abs_path($rel_name), | ||||
|     }, $class; | ||||
|     (@{$self}{qw/ dev ino mode nlink uid gid rdev size | ||||
|                 atime mtime ctime blksize blocks /}) | ||||
|         = lstat $self->{name}; | ||||
|     $self | ||||
| } | ||||
| 
 | ||||
| sub hash { | ||||
|     my $self = shift; | ||||
|     unless (defined $self->{_hash}) { | ||||
|         require Digest::SHA; | ||||
|         my $ctx = Digest::SHA->new; | ||||
|         $ctx->addfile($self->{name}); | ||||
|         $self->{_hash} = $ctx->hexdigest; | ||||
|     } | ||||
|     $self->{_hash} | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										57
									
								
								lib/Directory/Simplify/FileHash.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								lib/Directory/Simplify/FileHash.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,57 @@ | |||
| package Directory::Simplify::FileHash; | ||||
| use strict; | ||||
| use warnings; | ||||
| 
 | ||||
| =head1 DESCRIPTION | ||||
| 
 | ||||
| Object for abstracting management of a hashed filesystem | ||||
| 
 | ||||
| =cut | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::File; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         _entries => {}, | ||||
|         _files_in_hash => {}, | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub add { | ||||
|     my $self = shift; | ||||
|     my (@files, $callback); | ||||
|     if (ref $_[0] eq 'HASH') { | ||||
|         # Called method like { files => [] } | ||||
|         my %opts = %{$_[0]}; | ||||
|         @files = @{$opts{files}}; | ||||
|         $callback = $opts{callback}; | ||||
|     } else { | ||||
|         @files = @_; | ||||
|     } | ||||
|     foreach my $file (@files) { | ||||
|         unless (ref $file eq 'Directory::Simplify::File') { | ||||
|             $file = Directory::Simplify::File->new($file); | ||||
|         } | ||||
|         unless ($self->{_files_in_hash}->{$file->{name}}) { | ||||
|             my $hash = $file->hash; | ||||
| 
 | ||||
|             unless (defined $self->{_entries}->{$hash}) { | ||||
|                 $self->{_entries}->{$hash} = []; | ||||
|             } | ||||
|             push @{$self->{_entries}->{$hash}}, $file; | ||||
|             &{$callback}($file) if ref $callback eq 'CODE'; | ||||
|         } | ||||
|         $self->{_files_in_hash}->{$file->{name}} = 1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| sub entries { | ||||
|     my $self = shift; | ||||
|     values %{$self->{_entries}} | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										33
									
								
								lib/Directory/Simplify/Instruction/CopyTimestamp.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								lib/Directory/Simplify/Instruction/CopyTimestamp.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| package Directory::Simplify::Instruction::CopyTimestamp; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Utils; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub run { | ||||
|     my $self = shift; | ||||
|     # preserve older time stamp | ||||
|     utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name}; | ||||
| } | ||||
| 
 | ||||
| sub bytes_freed { | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name}); | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										180
									
								
								lib/Directory/Simplify/Instruction/Generator.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								lib/Directory/Simplify/Instruction/Generator.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,180 @@ | |||
| package Directory::Simplify::Instruction::Generator; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| use File::Basename qw/ dirname /; | ||||
| use File::Compare qw/ compare /; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Instruction::CopyTimestamp; | ||||
| require Directory::Simplify::Instruction::Hardlink; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         filehash => undef, | ||||
|         min_size => 1, | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     join "\n", $self->instructions; | ||||
| } | ||||
| 
 | ||||
| sub buckets { | ||||
|     my $self = shift; | ||||
| 
 | ||||
|     my @candidate_lists = $self->{filehash}->entries; | ||||
| 
 | ||||
|     my @buckets; | ||||
|     foreach my $candidates (@candidate_lists) { | ||||
|         my @ca = @{$candidates}; # make a clone | ||||
|         my @these_buckets; | ||||
| 
 | ||||
|         # at least two files needed to link together | ||||
|         if (@ca > 1) { | ||||
|             ELIMINATOR: while (@ca) { | ||||
|                 my $entry = shift @ca; | ||||
| 
 | ||||
|                 next ELIMINATOR if $self->_entry_should_be_skipped($entry); | ||||
| 
 | ||||
|                 foreach my $bucket_idx (0 .. $#these_buckets) { | ||||
|                     if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) { | ||||
|                         push @{$these_buckets[$bucket_idx]}, $entry; | ||||
|                         next ELIMINATOR; | ||||
|                     } | ||||
|                 } | ||||
|                 # didn't find a bucket (even though the hash matched!) | ||||
|                 # -> new bucket | ||||
|                 push @these_buckets, [$entry]; | ||||
|             } | ||||
|         } | ||||
|         push @buckets, @these_buckets; | ||||
|     } | ||||
| 
 | ||||
|     @buckets | ||||
| } | ||||
| 
 | ||||
| sub _entry_should_be_skipped { | ||||
|     my ($self, $entry_a) = @_; | ||||
|     # too small to be hard-linked | ||||
|     if ($entry_a->{size} < $self->{min_size}) { | ||||
|         return 1; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub _entries_are_hard_linkable { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     # obviously, if the sizes aren't the same, they're not the same file | ||||
|     unless (&_entries_sizes_match($entry_a, $entry_b)) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     # they're the same file, don't try it | ||||
|     if (&_entries_are_already_hard_linked($entry_a, $entry_b)) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     if (&_entries_contents_match($entry_a, $entry_b)) { | ||||
|         return 1; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub oldest_mtime { | ||||
|     my $self = shift; | ||||
|     return sort { | ||||
|         $a->{mtime} <=> $b->{mtime} | ||||
|     } @_; | ||||
| } | ||||
| 
 | ||||
| sub more_linked { | ||||
|     my $self = shift; | ||||
|     my %warned; | ||||
|     return sort { | ||||
|         if (! -w &dirname($a->{name})) { | ||||
|             warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++; | ||||
|             return 1; # favor a -> front | ||||
|         } elsif (! -w &dirname($b->{name})) { | ||||
|             warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++; | ||||
|             return -1; # favor b -> front | ||||
|         } | ||||
|         $b->{nlink} <=> $a->{nlink} | ||||
|     } @_; | ||||
| } | ||||
| 
 | ||||
| sub _entries_are_already_hard_linked { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     if ($entry_a->{ino} == $entry_b->{ino} and | ||||
|         $entry_a->{dev} == $entry_b->{dev}) { | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub _entries_sizes_match { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
|     if ($entry_a->{size} != $entry_b->{size}) { | ||||
|         return 0; | ||||
|     } | ||||
|     return 1; | ||||
| } | ||||
| sub _entries_contents_match { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name})); | ||||
| 
 | ||||
|     # warn about hash collision | ||||
|     unless ($contents_same) { | ||||
|         warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n  (don't panic)\n"; | ||||
|     } | ||||
|     return $contents_same; | ||||
| } | ||||
| 
 | ||||
| # generate hardlink instructions | ||||
| sub instructions { | ||||
|     require Scalar::Util; | ||||
|     my $self = shift; | ||||
| 
 | ||||
|     # start generating instructions | ||||
|     my @inst; | ||||
|     foreach my $bucket ($self->buckets) { | ||||
| 
 | ||||
|         # of the bucket, find the oldest timestamp | ||||
|         my ($oldest_entry) = $self->oldest_mtime(@{$bucket}); | ||||
| 
 | ||||
|         # of the bucket, find the file most embedded in the file system | ||||
|         my @to_link = $self->more_linked(@{$bucket}); | ||||
|         my $most_linked_entry = shift @to_link; | ||||
|         foreach my $entry (@to_link) { | ||||
|             # XXX there shouldn't be a need to update entries' link counts, | ||||
|             # since this generates all the instructions at once | ||||
|             push @inst, Directory::Simplify::Instruction::Hardlink->new( | ||||
|                 source => $most_linked_entry, | ||||
|                 target => $entry, | ||||
|             ); | ||||
|             push @inst, Directory::Simplify::Instruction::CopyTimestamp->new( | ||||
|                 source => $oldest_entry, | ||||
|                 target => $entry, | ||||
|             ); | ||||
|         } | ||||
|         if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) { | ||||
|             # most_linked_entry should get its timestamp updated | ||||
|             push @inst, Directory::Simplify::Instruction::CopyTimestamp->new( | ||||
|                 source => $oldest_entry, | ||||
|                 target => $most_linked_entry, | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|     @inst | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										46
									
								
								lib/Directory/Simplify/Instruction/Hardlink.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								lib/Directory/Simplify/Instruction/Hardlink.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | |||
| package Directory::Simplify::Instruction::Hardlink; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Utils; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         freed => 0, | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub run { | ||||
|     my $self = shift; | ||||
|     # hard link the files | ||||
| 
 | ||||
|     unless (unlink $self->{target}->{name}) { | ||||
|         die "Failed to remove file `$self->{target}->{name}': $!\n"; | ||||
|     } | ||||
|     unless (link $self->{source}->{name}, $self->{target}->{name}) { | ||||
|         die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!"; | ||||
|     } | ||||
|     # bookkeeping | ||||
|     ++$self->{source}->{nlink}; | ||||
|     if (--$self->{target}->{nlink} == 0) { | ||||
|         $self->{freed} = $self->{target}->{size}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| sub bytes_freed { | ||||
|     my $self = shift; | ||||
|     return $self->{freed}; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name}); | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										47
									
								
								lib/Directory/Simplify/Utils.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								lib/Directory/Simplify/Utils.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| package Directory::Simplify::Utils; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| 
 | ||||
| sub addcommas { | ||||
|     my @added; | ||||
|     foreach my $num (@_) { | ||||
|         # don't split anything after the decimal | ||||
|         my @parts = split /\./, $num; | ||||
|         while ($parts[0] =~ s/(\d)(\d{3}(?:\D|$))/$1,$2/) { | ||||
|         } | ||||
|         push @added, (join '.', @parts); | ||||
|     } | ||||
|     wantarray ? @added : $added[0] | ||||
| } | ||||
| 
 | ||||
| sub hr_size { | ||||
|     my $sz = shift; | ||||
|     my @sizes = qw/ B KB MB GB TB PB EB ZB YB /; | ||||
|     my $fact = 1024; | ||||
|     my $thresh = 0.1; | ||||
|     my @ret; | ||||
|     foreach my $exp (reverse 0 .. $#sizes) { | ||||
|         if ($sz > (1 - $thresh) * $fact ** $exp) { | ||||
|             @ret = ($sz / $fact ** $exp, $sizes[$exp]); | ||||
|             last; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     # default to ($sz, 'bytes') | ||||
|     @ret = ($sz, $sizes[0]) unless @ret; | ||||
| 
 | ||||
|     wantarray ? @ret : "@ret" | ||||
| } | ||||
| 
 | ||||
| sub shell_quote { | ||||
|     # shell-escape argument for inclusion in non-interpolated single quotes | ||||
|     my @transformed = map { | ||||
|         (my $out = $_) | ||||
|             =~ s/'/'\\''/g; | ||||
|         "'$out'"; | ||||
|     } @_; | ||||
|     wantarray ? @transformed : $transformed[0]; | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue