Move library code to lib/

Fixes accidental inclusion of test instrumentation into the all-in-one
script.
This commit is contained in:
Dan Church 2023-06-29 12:59:27 -05:00
parent 9dc3e4578c
commit 4d4edd5e9d
Signed by: h3xx
GPG key ID: EA2BF379CD2CDBD0
8 changed files with 2 additions and 2 deletions

View file

@ -0,0 +1,31 @@
package Directory::Simplify::File;
# vi: et sts=4 sw=4 ts=4
use strict;
use warnings;
require Cwd;
sub new {
my $class = shift;
my $rel_name = shift;
my $self = bless {
rel_name => $rel_name,
name => Cwd::abs_path($rel_name),
}, $class;
(@{$self}{qw/ dev ino mode nlink uid gid rdev size
atime mtime ctime blksize blocks /})
= lstat $self->{name};
$self
}
sub hash {
my $self = shift;
unless (defined $self->{_hash}) {
require Digest::SHA;
my $ctx = Digest::SHA->new;
$ctx->addfile($self->{name});
$self->{_hash} = $ctx->hexdigest;
}
$self->{_hash}
}
1;

View file

@ -0,0 +1,57 @@
package Directory::Simplify::FileHash;
use strict;
use warnings;
=head1 DESCRIPTION
Object for abstracting management of a hashed filesystem
=cut
# :squash-remove-start:
require Directory::Simplify::File;
# :squash-remove-end:
sub new {
my $class = shift;
return bless {
_entries => {},
_files_in_hash => {},
@_,
}, $class;
}
sub add {
my $self = shift;
my (@files, $callback);
if (ref $_[0] eq 'HASH') {
# Called method like { files => [] }
my %opts = %{$_[0]};
@files = @{$opts{files}};
$callback = $opts{callback};
} else {
@files = @_;
}
foreach my $file (@files) {
unless (ref $file eq 'Directory::Simplify::File') {
$file = Directory::Simplify::File->new($file);
}
unless ($self->{_files_in_hash}->{$file->{name}}) {
my $hash = $file->hash;
unless (defined $self->{_entries}->{$hash}) {
$self->{_entries}->{$hash} = [];
}
push @{$self->{_entries}->{$hash}}, $file;
&{$callback}($file) if ref $callback eq 'CODE';
}
$self->{_files_in_hash}->{$file->{name}} = 1;
}
}
sub entries {
my $self = shift;
values %{$self->{_entries}}
}
1;

View file

@ -0,0 +1,33 @@
package Directory::Simplify::Instruction::CopyTimestamp;
# vi: et sts=4 sw=4 ts=4
use strict;
use warnings;
use overload '""' => 'as_string';
# :squash-remove-start:
require Directory::Simplify::Utils;
# :squash-remove-end:
sub new {
my $class = shift;
return bless {
@_,
}, $class;
}
sub run {
my $self = shift;
# preserve older time stamp
utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name};
}
sub bytes_freed {
return 0;
}
sub as_string {
my $self = shift;
return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
}
1;

View file

@ -0,0 +1,180 @@
package Directory::Simplify::Instruction::Generator;
# vi: et sts=4 sw=4 ts=4
use strict;
use warnings;
use overload '""' => 'as_string';
use File::Basename qw/ dirname /;
use File::Compare qw/ compare /;
# :squash-remove-start:
require Directory::Simplify::Instruction::CopyTimestamp;
require Directory::Simplify::Instruction::Hardlink;
# :squash-remove-end:
sub new {
my $class = shift;
return bless {
filehash => undef,
min_size => 1,
@_,
}, $class;
}
sub as_string {
my $self = shift;
join "\n", $self->instructions;
}
sub buckets {
my $self = shift;
my @candidate_lists = $self->{filehash}->entries;
my @buckets;
foreach my $candidates (@candidate_lists) {
my @ca = @{$candidates}; # make a clone
my @these_buckets;
# at least two files needed to link together
if (@ca > 1) {
ELIMINATOR: while (@ca) {
my $entry = shift @ca;
next ELIMINATOR if $self->_entry_should_be_skipped($entry);
foreach my $bucket_idx (0 .. $#these_buckets) {
if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) {
push @{$these_buckets[$bucket_idx]}, $entry;
next ELIMINATOR;
}
}
# didn't find a bucket (even though the hash matched!)
# -> new bucket
push @these_buckets, [$entry];
}
}
push @buckets, @these_buckets;
}
@buckets
}
sub _entry_should_be_skipped {
my ($self, $entry_a) = @_;
# too small to be hard-linked
if ($entry_a->{size} < $self->{min_size}) {
return 1;
}
return 0;
}
sub _entries_are_hard_linkable {
my ($entry_a, $entry_b) = @_;
# obviously, if the sizes aren't the same, they're not the same file
unless (&_entries_sizes_match($entry_a, $entry_b)) {
return 0;
}
# they're the same file, don't try it
if (&_entries_are_already_hard_linked($entry_a, $entry_b)) {
return 0;
}
if (&_entries_contents_match($entry_a, $entry_b)) {
return 1;
}
return 0;
}
sub oldest_mtime {
my $self = shift;
return sort {
$a->{mtime} <=> $b->{mtime}
} @_;
}
sub more_linked {
my $self = shift;
my %warned;
return sort {
if (! -w &dirname($a->{name})) {
warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++;
return 1; # favor a -> front
} elsif (! -w &dirname($b->{name})) {
warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++;
return -1; # favor b -> front
}
$b->{nlink} <=> $a->{nlink}
} @_;
}
sub _entries_are_already_hard_linked {
my ($entry_a, $entry_b) = @_;
if ($entry_a->{ino} == $entry_b->{ino} and
$entry_a->{dev} == $entry_b->{dev}) {
return 1;
}
return 0;
}
sub _entries_sizes_match {
my ($entry_a, $entry_b) = @_;
if ($entry_a->{size} != $entry_b->{size}) {
return 0;
}
return 1;
}
sub _entries_contents_match {
my ($entry_a, $entry_b) = @_;
my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name}));
# warn about hash collision
unless ($contents_same) {
warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n (don't panic)\n";
}
return $contents_same;
}
# generate hardlink instructions
sub instructions {
require Scalar::Util;
my $self = shift;
# start generating instructions
my @inst;
foreach my $bucket ($self->buckets) {
# of the bucket, find the oldest timestamp
my ($oldest_entry) = $self->oldest_mtime(@{$bucket});
# of the bucket, find the file most embedded in the file system
my @to_link = $self->more_linked(@{$bucket});
my $most_linked_entry = shift @to_link;
foreach my $entry (@to_link) {
# XXX there shouldn't be a need to update entries' link counts,
# since this generates all the instructions at once
push @inst, Directory::Simplify::Instruction::Hardlink->new(
source => $most_linked_entry,
target => $entry,
);
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
source => $oldest_entry,
target => $entry,
);
}
if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) {
# most_linked_entry should get its timestamp updated
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
source => $oldest_entry,
target => $most_linked_entry,
);
}
}
@inst
}
1;

View file

@ -0,0 +1,46 @@
package Directory::Simplify::Instruction::Hardlink;
# vi: et sts=4 sw=4 ts=4
use strict;
use warnings;
use overload '""' => 'as_string';
# :squash-remove-start:
require Directory::Simplify::Utils;
# :squash-remove-end:
sub new {
my $class = shift;
return bless {
freed => 0,
@_,
}, $class;
}
sub run {
my $self = shift;
# hard link the files
unless (unlink $self->{target}->{name}) {
die "Failed to remove file `$self->{target}->{name}': $!\n";
}
unless (link $self->{source}->{name}, $self->{target}->{name}) {
die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!";
}
# bookkeeping
++$self->{source}->{nlink};
if (--$self->{target}->{nlink} == 0) {
$self->{freed} = $self->{target}->{size};
}
}
sub bytes_freed {
my $self = shift;
return $self->{freed};
}
sub as_string {
my $self = shift;
return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
}
1;

View file

@ -0,0 +1,47 @@
package Directory::Simplify::Utils;
# vi: et sts=4 sw=4 ts=4
use strict;
use warnings;
sub addcommas {
my @added;
foreach my $num (@_) {
# don't split anything after the decimal
my @parts = split /\./, $num;
while ($parts[0] =~ s/(\d)(\d{3}(?:\D|$))/$1,$2/) {
}
push @added, (join '.', @parts);
}
wantarray ? @added : $added[0]
}
sub hr_size {
my $sz = shift;
my @sizes = qw/ B KB MB GB TB PB EB ZB YB /;
my $fact = 1024;
my $thresh = 0.1;
my @ret;
foreach my $exp (reverse 0 .. $#sizes) {
if ($sz > (1 - $thresh) * $fact ** $exp) {
@ret = ($sz / $fact ** $exp, $sizes[$exp]);
last;
}
}
# default to ($sz, 'bytes')
@ret = ($sz, $sizes[0]) unless @ret;
wantarray ? @ret : "@ret"
}
sub shell_quote {
# shell-escape argument for inclusion in non-interpolated single quotes
my @transformed = map {
(my $out = $_)
=~ s/'/'\\''/g;
"'$out'";
} @_;
wantarray ? @transformed : $transformed[0];
}
1;