mirror of
				https://codeberg.org/h3xx/simplify_static_dir
				synced 2024-08-14 23:57:24 +00:00 
			
		
		
		
	Move library code to lib/
Fixes accidental inclusion of test instrumentation into the all-in-one script.
This commit is contained in:
		
							parent
							
								
									9dc3e4578c
								
							
						
					
					
						commit
						4d4edd5e9d
					
				
					 8 changed files with 2 additions and 2 deletions
				
			
		
							
								
								
									
										33
									
								
								lib/Directory/Simplify/Instruction/CopyTimestamp.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								lib/Directory/Simplify/Instruction/CopyTimestamp.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| package Directory::Simplify::Instruction::CopyTimestamp; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Utils; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub run { | ||||
|     my $self = shift; | ||||
|     # preserve older time stamp | ||||
|     utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name}; | ||||
| } | ||||
| 
 | ||||
| sub bytes_freed { | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name}); | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										180
									
								
								lib/Directory/Simplify/Instruction/Generator.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								lib/Directory/Simplify/Instruction/Generator.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,180 @@ | |||
| package Directory::Simplify::Instruction::Generator; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| use File::Basename qw/ dirname /; | ||||
| use File::Compare qw/ compare /; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Instruction::CopyTimestamp; | ||||
| require Directory::Simplify::Instruction::Hardlink; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         filehash => undef, | ||||
|         min_size => 1, | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     join "\n", $self->instructions; | ||||
| } | ||||
| 
 | ||||
| sub buckets { | ||||
|     my $self = shift; | ||||
| 
 | ||||
|     my @candidate_lists = $self->{filehash}->entries; | ||||
| 
 | ||||
|     my @buckets; | ||||
|     foreach my $candidates (@candidate_lists) { | ||||
|         my @ca = @{$candidates}; # make a clone | ||||
|         my @these_buckets; | ||||
| 
 | ||||
|         # at least two files needed to link together | ||||
|         if (@ca > 1) { | ||||
|             ELIMINATOR: while (@ca) { | ||||
|                 my $entry = shift @ca; | ||||
| 
 | ||||
|                 next ELIMINATOR if $self->_entry_should_be_skipped($entry); | ||||
| 
 | ||||
|                 foreach my $bucket_idx (0 .. $#these_buckets) { | ||||
|                     if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) { | ||||
|                         push @{$these_buckets[$bucket_idx]}, $entry; | ||||
|                         next ELIMINATOR; | ||||
|                     } | ||||
|                 } | ||||
|                 # didn't find a bucket (even though the hash matched!) | ||||
|                 # -> new bucket | ||||
|                 push @these_buckets, [$entry]; | ||||
|             } | ||||
|         } | ||||
|         push @buckets, @these_buckets; | ||||
|     } | ||||
| 
 | ||||
|     @buckets | ||||
| } | ||||
| 
 | ||||
| sub _entry_should_be_skipped { | ||||
|     my ($self, $entry_a) = @_; | ||||
|     # too small to be hard-linked | ||||
|     if ($entry_a->{size} < $self->{min_size}) { | ||||
|         return 1; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub _entries_are_hard_linkable { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     # obviously, if the sizes aren't the same, they're not the same file | ||||
|     unless (&_entries_sizes_match($entry_a, $entry_b)) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     # they're the same file, don't try it | ||||
|     if (&_entries_are_already_hard_linked($entry_a, $entry_b)) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     if (&_entries_contents_match($entry_a, $entry_b)) { | ||||
|         return 1; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub oldest_mtime { | ||||
|     my $self = shift; | ||||
|     return sort { | ||||
|         $a->{mtime} <=> $b->{mtime} | ||||
|     } @_; | ||||
| } | ||||
| 
 | ||||
| sub more_linked { | ||||
|     my $self = shift; | ||||
|     my %warned; | ||||
|     return sort { | ||||
|         if (! -w &dirname($a->{name})) { | ||||
|             warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++; | ||||
|             return 1; # favor a -> front | ||||
|         } elsif (! -w &dirname($b->{name})) { | ||||
|             warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++; | ||||
|             return -1; # favor b -> front | ||||
|         } | ||||
|         $b->{nlink} <=> $a->{nlink} | ||||
|     } @_; | ||||
| } | ||||
| 
 | ||||
| sub _entries_are_already_hard_linked { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     if ($entry_a->{ino} == $entry_b->{ino} and | ||||
|         $entry_a->{dev} == $entry_b->{dev}) { | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| sub _entries_sizes_match { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
|     if ($entry_a->{size} != $entry_b->{size}) { | ||||
|         return 0; | ||||
|     } | ||||
|     return 1; | ||||
| } | ||||
| sub _entries_contents_match { | ||||
|     my ($entry_a, $entry_b) = @_; | ||||
| 
 | ||||
|     my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name})); | ||||
| 
 | ||||
|     # warn about hash collision | ||||
|     unless ($contents_same) { | ||||
|         warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n  (don't panic)\n"; | ||||
|     } | ||||
|     return $contents_same; | ||||
| } | ||||
| 
 | ||||
| # generate hardlink instructions | ||||
| sub instructions { | ||||
|     require Scalar::Util; | ||||
|     my $self = shift; | ||||
| 
 | ||||
|     # start generating instructions | ||||
|     my @inst; | ||||
|     foreach my $bucket ($self->buckets) { | ||||
| 
 | ||||
|         # of the bucket, find the oldest timestamp | ||||
|         my ($oldest_entry) = $self->oldest_mtime(@{$bucket}); | ||||
| 
 | ||||
|         # of the bucket, find the file most embedded in the file system | ||||
|         my @to_link = $self->more_linked(@{$bucket}); | ||||
|         my $most_linked_entry = shift @to_link; | ||||
|         foreach my $entry (@to_link) { | ||||
|             # XXX there shouldn't be a need to update entries' link counts, | ||||
|             # since this generates all the instructions at once | ||||
|             push @inst, Directory::Simplify::Instruction::Hardlink->new( | ||||
|                 source => $most_linked_entry, | ||||
|                 target => $entry, | ||||
|             ); | ||||
|             push @inst, Directory::Simplify::Instruction::CopyTimestamp->new( | ||||
|                 source => $oldest_entry, | ||||
|                 target => $entry, | ||||
|             ); | ||||
|         } | ||||
|         if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) { | ||||
|             # most_linked_entry should get its timestamp updated | ||||
|             push @inst, Directory::Simplify::Instruction::CopyTimestamp->new( | ||||
|                 source => $oldest_entry, | ||||
|                 target => $most_linked_entry, | ||||
|             ); | ||||
|         } | ||||
|     } | ||||
|     @inst | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
							
								
								
									
										46
									
								
								lib/Directory/Simplify/Instruction/Hardlink.pm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								lib/Directory/Simplify/Instruction/Hardlink.pm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | |||
| package Directory::Simplify::Instruction::Hardlink; | ||||
| # vi: et sts=4 sw=4 ts=4 | ||||
| use strict; | ||||
| use warnings; | ||||
| use overload '""' => 'as_string'; | ||||
| 
 | ||||
| # :squash-remove-start: | ||||
| require Directory::Simplify::Utils; | ||||
| # :squash-remove-end: | ||||
| 
 | ||||
| sub new { | ||||
|     my $class = shift; | ||||
|     return bless { | ||||
|         freed => 0, | ||||
|         @_, | ||||
|     }, $class; | ||||
| } | ||||
| 
 | ||||
| sub run { | ||||
|     my $self = shift; | ||||
|     # hard link the files | ||||
| 
 | ||||
|     unless (unlink $self->{target}->{name}) { | ||||
|         die "Failed to remove file `$self->{target}->{name}': $!\n"; | ||||
|     } | ||||
|     unless (link $self->{source}->{name}, $self->{target}->{name}) { | ||||
|         die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!"; | ||||
|     } | ||||
|     # bookkeeping | ||||
|     ++$self->{source}->{nlink}; | ||||
|     if (--$self->{target}->{nlink} == 0) { | ||||
|         $self->{freed} = $self->{target}->{size}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| sub bytes_freed { | ||||
|     my $self = shift; | ||||
|     return $self->{freed}; | ||||
| } | ||||
| 
 | ||||
| sub as_string { | ||||
|     my $self = shift; | ||||
|     return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name}); | ||||
| } | ||||
| 
 | ||||
| 1; | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue