mirror of
https://codeberg.org/h3xx/simplify_static_dir
synced 2024-08-14 23:57:24 +00:00
Move library code to lib/
Fixes accidental inclusion of test instrumentation into the all-in-one script.
This commit is contained in:
parent
9dc3e4578c
commit
4d4edd5e9d
8 changed files with 2 additions and 2 deletions
33
lib/Directory/Simplify/Instruction/CopyTimestamp.pm
Normal file
33
lib/Directory/Simplify/Instruction/CopyTimestamp.pm
Normal file
|
@ -0,0 +1,33 @@
|
|||
package Directory::Simplify::Instruction::CopyTimestamp;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
# :squash-remove-start:
|
||||
require Directory::Simplify::Utils;
|
||||
# :squash-remove-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# preserve older time stamp
|
||||
utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name};
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
1;
|
180
lib/Directory/Simplify/Instruction/Generator.pm
Normal file
180
lib/Directory/Simplify/Instruction/Generator.pm
Normal file
|
@ -0,0 +1,180 @@
|
|||
package Directory::Simplify::Instruction::Generator;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
use File::Basename qw/ dirname /;
|
||||
use File::Compare qw/ compare /;
|
||||
|
||||
# :squash-remove-start:
|
||||
require Directory::Simplify::Instruction::CopyTimestamp;
|
||||
require Directory::Simplify::Instruction::Hardlink;
|
||||
# :squash-remove-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
filehash => undef,
|
||||
min_size => 1,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
join "\n", $self->instructions;
|
||||
}
|
||||
|
||||
sub buckets {
|
||||
my $self = shift;
|
||||
|
||||
my @candidate_lists = $self->{filehash}->entries;
|
||||
|
||||
my @buckets;
|
||||
foreach my $candidates (@candidate_lists) {
|
||||
my @ca = @{$candidates}; # make a clone
|
||||
my @these_buckets;
|
||||
|
||||
# at least two files needed to link together
|
||||
if (@ca > 1) {
|
||||
ELIMINATOR: while (@ca) {
|
||||
my $entry = shift @ca;
|
||||
|
||||
next ELIMINATOR if $self->_entry_should_be_skipped($entry);
|
||||
|
||||
foreach my $bucket_idx (0 .. $#these_buckets) {
|
||||
if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) {
|
||||
push @{$these_buckets[$bucket_idx]}, $entry;
|
||||
next ELIMINATOR;
|
||||
}
|
||||
}
|
||||
# didn't find a bucket (even though the hash matched!)
|
||||
# -> new bucket
|
||||
push @these_buckets, [$entry];
|
||||
}
|
||||
}
|
||||
push @buckets, @these_buckets;
|
||||
}
|
||||
|
||||
@buckets
|
||||
}
|
||||
|
||||
sub _entry_should_be_skipped {
|
||||
my ($self, $entry_a) = @_;
|
||||
# too small to be hard-linked
|
||||
if ($entry_a->{size} < $self->{min_size}) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_are_hard_linkable {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
# obviously, if the sizes aren't the same, they're not the same file
|
||||
unless (&_entries_sizes_match($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
# they're the same file, don't try it
|
||||
if (&_entries_are_already_hard_linked($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (&_entries_contents_match($entry_a, $entry_b)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub oldest_mtime {
|
||||
my $self = shift;
|
||||
return sort {
|
||||
$a->{mtime} <=> $b->{mtime}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub more_linked {
|
||||
my $self = shift;
|
||||
my %warned;
|
||||
return sort {
|
||||
if (! -w &dirname($a->{name})) {
|
||||
warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++;
|
||||
return 1; # favor a -> front
|
||||
} elsif (! -w &dirname($b->{name})) {
|
||||
warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++;
|
||||
return -1; # favor b -> front
|
||||
}
|
||||
$b->{nlink} <=> $a->{nlink}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub _entries_are_already_hard_linked {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
if ($entry_a->{ino} == $entry_b->{ino} and
|
||||
$entry_a->{dev} == $entry_b->{dev}) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_sizes_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
if ($entry_a->{size} != $entry_b->{size}) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
sub _entries_contents_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name}));
|
||||
|
||||
# warn about hash collision
|
||||
unless ($contents_same) {
|
||||
warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n (don't panic)\n";
|
||||
}
|
||||
return $contents_same;
|
||||
}
|
||||
|
||||
# generate hardlink instructions
|
||||
sub instructions {
|
||||
require Scalar::Util;
|
||||
my $self = shift;
|
||||
|
||||
# start generating instructions
|
||||
my @inst;
|
||||
foreach my $bucket ($self->buckets) {
|
||||
|
||||
# of the bucket, find the oldest timestamp
|
||||
my ($oldest_entry) = $self->oldest_mtime(@{$bucket});
|
||||
|
||||
# of the bucket, find the file most embedded in the file system
|
||||
my @to_link = $self->more_linked(@{$bucket});
|
||||
my $most_linked_entry = shift @to_link;
|
||||
foreach my $entry (@to_link) {
|
||||
# XXX there shouldn't be a need to update entries' link counts,
|
||||
# since this generates all the instructions at once
|
||||
push @inst, Directory::Simplify::Instruction::Hardlink->new(
|
||||
source => $most_linked_entry,
|
||||
target => $entry,
|
||||
);
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $entry,
|
||||
);
|
||||
}
|
||||
if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) {
|
||||
# most_linked_entry should get its timestamp updated
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $most_linked_entry,
|
||||
);
|
||||
}
|
||||
}
|
||||
@inst
|
||||
}
|
||||
|
||||
1;
|
46
lib/Directory/Simplify/Instruction/Hardlink.pm
Normal file
46
lib/Directory/Simplify/Instruction/Hardlink.pm
Normal file
|
@ -0,0 +1,46 @@
|
|||
package Directory::Simplify::Instruction::Hardlink;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
# :squash-remove-start:
|
||||
require Directory::Simplify::Utils;
|
||||
# :squash-remove-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
freed => 0,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# hard link the files
|
||||
|
||||
unless (unlink $self->{target}->{name}) {
|
||||
die "Failed to remove file `$self->{target}->{name}': $!\n";
|
||||
}
|
||||
unless (link $self->{source}->{name}, $self->{target}->{name}) {
|
||||
die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!";
|
||||
}
|
||||
# bookkeeping
|
||||
++$self->{source}->{nlink};
|
||||
if (--$self->{target}->{nlink} == 0) {
|
||||
$self->{freed} = $self->{target}->{size};
|
||||
}
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
my $self = shift;
|
||||
return $self->{freed};
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
1;
|
Loading…
Add table
Add a link
Reference in a new issue