Break out packages into separate files
This commit is contained in:
parent
f04cbac723
commit
b1b4e18e05
|
@ -0,0 +1 @@
|
|||
/simplify_static_dir.pl
|
|
@ -0,0 +1,31 @@
|
|||
package Directory::Simplify::File;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
require Cwd;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $rel_name = shift;
|
||||
my $self = bless {
|
||||
rel_name => $rel_name,
|
||||
name => Cwd::abs_path($rel_name),
|
||||
}, $class;
|
||||
(@{$self}{qw/ dev ino mode nlink uid gid rdev size
|
||||
atime mtime ctime blksize blocks /})
|
||||
= lstat $self->{name};
|
||||
$self
|
||||
}
|
||||
|
||||
sub hash {
|
||||
my $self = shift;
|
||||
unless (defined $self->{_hash}) {
|
||||
require Digest::SHA;
|
||||
my $ctx = Digest::SHA->new;
|
||||
$ctx->addfile($self->{name});
|
||||
$self->{_hash} = $ctx->hexdigest;
|
||||
}
|
||||
$self->{_hash}
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,57 @@
|
|||
package Directory::Simplify::FileHash;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Object for abstracting management of a hashed filesystem
|
||||
|
||||
=cut
|
||||
|
||||
# :squash-ignore-start:
|
||||
require Directory::Simplify::File;
|
||||
# :squash-ignore-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
_entries => {},
|
||||
_files_in_hash => {},
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub add {
|
||||
my $self = shift;
|
||||
my (@files, $callback);
|
||||
if (ref $_[0] eq 'HASH') {
|
||||
# Called method like { files => [] }
|
||||
my %opts = %{$_[0]};
|
||||
@files = @{$opts{files}};
|
||||
$callback = $opts{callback};
|
||||
} else {
|
||||
@files = @_;
|
||||
}
|
||||
foreach my $file (@files) {
|
||||
unless (ref $file eq 'Directory::Simplify::File') {
|
||||
$file = Directory::Simplify::File->new($file);
|
||||
}
|
||||
unless ($self->{_files_in_hash}->{$file->{name}}) {
|
||||
my $hash = $file->hash;
|
||||
|
||||
unless (defined $self->{_entries}->{$hash}) {
|
||||
$self->{_entries}->{$hash} = [];
|
||||
}
|
||||
push @{$self->{_entries}->{$hash}}, $file;
|
||||
&{$callback}($file) if ref $callback eq 'CODE';
|
||||
}
|
||||
$self->{_files_in_hash}->{$file->{name}} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
sub entries {
|
||||
my $self = shift;
|
||||
values %{$self->{_entries}}
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,33 @@
|
|||
package Directory::Simplify::Instruction::CopyTimestamp;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
# :squash-ignore-start:
|
||||
require Directory::Simplify::Utils;
|
||||
# :squash-ignore-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# preserve older time stamp
|
||||
utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name};
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,180 @@
|
|||
package Directory::Simplify::Instruction::Generator;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
use File::Basename qw/ dirname /;
|
||||
use File::Compare qw/ compare /;
|
||||
|
||||
# :squash-ignore-start:
|
||||
require Directory::Simplify::Instruction::CopyTimestamp;
|
||||
require Directory::Simplify::Instruction::Hardlink;
|
||||
# :squash-ignore-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
filehash => undef,
|
||||
min_size => 1,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
join "\n", $self->instructions;
|
||||
}
|
||||
|
||||
sub buckets {
|
||||
my $self = shift;
|
||||
|
||||
my @candidate_lists = $self->{filehash}->entries;
|
||||
|
||||
my @buckets;
|
||||
foreach my $candidates (@candidate_lists) {
|
||||
my @ca = @{$candidates}; # make a clone
|
||||
my @these_buckets;
|
||||
|
||||
# at least two files needed to link together
|
||||
if (@ca > 1) {
|
||||
ELIMINATOR: while (@ca) {
|
||||
my $entry = shift @ca;
|
||||
|
||||
next ELIMINATOR if $self->_entry_should_be_skipped($entry);
|
||||
|
||||
foreach my $bucket_idx (0 .. $#these_buckets) {
|
||||
if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) {
|
||||
push @{$these_buckets[$bucket_idx]}, $entry;
|
||||
next ELIMINATOR;
|
||||
}
|
||||
}
|
||||
# didn't find a bucket (even though the hash matched!)
|
||||
# -> new bucket
|
||||
push @these_buckets, [$entry];
|
||||
}
|
||||
}
|
||||
push @buckets, @these_buckets;
|
||||
}
|
||||
|
||||
@buckets
|
||||
}
|
||||
|
||||
sub _entry_should_be_skipped {
|
||||
my ($self, $entry_a) = @_;
|
||||
# too small to be hard-linked
|
||||
if ($entry_a->{size} < $self->{min_size}) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_are_hard_linkable {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
# obviously, if the sizes aren't the same, they're not the same file
|
||||
unless (&_entries_sizes_match($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
# they're the same file, don't try it
|
||||
if (&_entries_are_already_hard_linked($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (&_entries_contents_match($entry_a, $entry_b)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub oldest_mtime {
|
||||
my $self = shift;
|
||||
return sort {
|
||||
$a->{mtime} <=> $b->{mtime}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub more_linked {
|
||||
my $self = shift;
|
||||
my %warned;
|
||||
return sort {
|
||||
if (! -w &dirname($a->{name})) {
|
||||
warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++;
|
||||
return 1; # favor a -> front
|
||||
} elsif (! -w &dirname($b->{name})) {
|
||||
warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++;
|
||||
return -1; # favor b -> front
|
||||
}
|
||||
$b->{nlink} <=> $a->{nlink}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub _entries_are_already_hard_linked {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
if ($entry_a->{ino} == $entry_b->{ino} and
|
||||
$entry_a->{dev} == $entry_b->{dev}) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_sizes_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
if ($entry_a->{size} != $entry_b->{size}) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
sub _entries_contents_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name}));
|
||||
|
||||
# warn about hash collision
|
||||
unless ($contents_same) {
|
||||
warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n (don't panic)\n";
|
||||
}
|
||||
return $contents_same;
|
||||
}
|
||||
|
||||
# generate hardlink instructions
|
||||
sub instructions {
|
||||
require Scalar::Util;
|
||||
my $self = shift;
|
||||
|
||||
# start generating instructions
|
||||
my @inst;
|
||||
foreach my $bucket ($self->buckets) {
|
||||
|
||||
# of the bucket, find the oldest timestamp
|
||||
my ($oldest_entry) = $self->oldest_mtime(@{$bucket});
|
||||
|
||||
# of the bucket, find the file most embedded in the file system
|
||||
my @to_link = $self->more_linked(@{$bucket});
|
||||
my $most_linked_entry = shift @to_link;
|
||||
foreach my $entry (@to_link) {
|
||||
# XXX there shouldn't be a need to update entries' link counts,
|
||||
# since this generates all the instructions at once
|
||||
push @inst, Directory::Simplify::Instruction::Hardlink->new(
|
||||
source => $most_linked_entry,
|
||||
target => $entry,
|
||||
);
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $entry,
|
||||
);
|
||||
}
|
||||
if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) {
|
||||
# most_linked_entry should get its timestamp updated
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $most_linked_entry,
|
||||
);
|
||||
}
|
||||
}
|
||||
@inst
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,46 @@
|
|||
package Directory::Simplify::Instruction::Hardlink;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
# :squash-ignore-start:
|
||||
require Directory::Simplify::Utils;
|
||||
# :squash-ignore-end:
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
freed => 0,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# hard link the files
|
||||
|
||||
unless (unlink $self->{target}->{name}) {
|
||||
die "Failed to remove file `$self->{target}->{name}': $!\n";
|
||||
}
|
||||
unless (link $self->{source}->{name}, $self->{target}->{name}) {
|
||||
die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!";
|
||||
}
|
||||
# bookkeeping
|
||||
++$self->{source}->{nlink};
|
||||
if (--$self->{target}->{nlink} == 0) {
|
||||
$self->{freed} = $self->{target}->{size};
|
||||
}
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
my $self = shift;
|
||||
return $self->{freed};
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,47 @@
|
|||
package Directory::Simplify::Utils;
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
sub addcommas {
|
||||
my @added;
|
||||
foreach my $num (@_) {
|
||||
# don't split anything after the decimal
|
||||
my @parts = split /\./, $num;
|
||||
while ($parts[0] =~ s/(\d)(\d{3}(?:\D|$))/$1,$2/) {
|
||||
}
|
||||
push @added, (join '.', @parts);
|
||||
}
|
||||
wantarray ? @added : $added[0]
|
||||
}
|
||||
|
||||
sub hr_size {
|
||||
my $sz = shift;
|
||||
my @sizes = qw/ B KB MB GB TB PB EB ZB YB /;
|
||||
my $fact = 1024;
|
||||
my $thresh = 0.1;
|
||||
my @ret;
|
||||
foreach my $exp (reverse 0 .. $#sizes) {
|
||||
if ($sz > (1 - $thresh) * $fact ** $exp) {
|
||||
@ret = ($sz / $fact ** $exp, $sizes[$exp]);
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# default to ($sz, 'bytes')
|
||||
@ret = ($sz, $sizes[0]) unless @ret;
|
||||
|
||||
wantarray ? @ret : "@ret"
|
||||
}
|
||||
|
||||
sub shell_quote {
|
||||
# shell-escape argument for inclusion in non-interpolated single quotes
|
||||
my @transformed = map {
|
||||
(my $out = $_)
|
||||
=~ s/'/'\\''/g;
|
||||
"'$out'";
|
||||
} @_;
|
||||
wantarray ? @transformed : $transformed[0];
|
||||
}
|
||||
|
||||
1;
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
WORKDIR=${0%/*}
|
||||
OUT=$WORKDIR/simplify_static_dir.pl
|
||||
|
||||
echo "Outputting to $OUT" >&2
|
||||
|
||||
shopt -s globstar
|
||||
"$WORKDIR/util/squash" \
|
||||
"$WORKDIR/simplify_static_dir-main.pl" \
|
||||
"$WORKDIR"/**/*.pm \
|
||||
> "$OUT"
|
||||
chmod +x -- "$OUT"
|
|
@ -0,0 +1,257 @@
|
|||
#!/usr/bin/perl
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
|
||||
package main;
|
||||
use strict;
|
||||
use warnings;
|
||||
use List::Util qw/ sum /;
|
||||
|
||||
our $VERSION = '3.1.0';
|
||||
|
||||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
simplify_static_dir - optimize directories for size by combining identical
|
||||
files
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<simplify_static_dir> [I<OPTIONS>] [I<DIR>]...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The more files this script can process at once, the better. It maintains an
|
||||
internal hash of files indexed by their SHA1 checksum. When it finds a
|
||||
collision it removes the file with the least amount of file system links, then
|
||||
creates a hard link to the other in its place. The larger the number of files
|
||||
scanned, the more likely it is that there will be collisions.
|
||||
|
||||
There are failsafes in place, though. If somehow two files' SHA1 checksums are
|
||||
identical, but the file sizes do not match, this program will error out (and
|
||||
you can go ahead and report the collision; it may be worth money).
|
||||
|
||||
There are other features built in as well. Following the logic that unique
|
||||
data, once created has the attribute of being unique of that point in time,
|
||||
this script will copy the timestamp of the older file onto the newer file just
|
||||
before it makes the hard link. Therefore, many times, simplified directories
|
||||
will have the appearance of being older than they actually are.
|
||||
|
||||
From the perspective of any program reading files from the simplified
|
||||
directories, the files will lookB<*> and behave the same way as the initial
|
||||
state.
|
||||
|
||||
B<*> Except for having an increased number of hard links.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item B<-v>
|
||||
|
||||
Verbose output.
|
||||
|
||||
=item B<-f>
|
||||
|
||||
Print a sum of the number of freed bytes.
|
||||
|
||||
=item B<-m> I<REGEX>
|
||||
|
||||
Only match file paths matching I<REGEX>.
|
||||
|
||||
=item B<-M> I<REGEX>
|
||||
|
||||
Exclude file paths matching I<REGEX>.
|
||||
|
||||
=item B<-z>
|
||||
|
||||
Include zero-length files in search. Normally they are ignored (you don't save
|
||||
diskspace by hard-linking empty files).
|
||||
|
||||
=item B<--help>
|
||||
|
||||
Output this help message and exit.
|
||||
|
||||
=item B<--version>
|
||||
|
||||
Output version information and exit.
|
||||
|
||||
=back
|
||||
|
||||
By default, scans the current directory.
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright (C) 2010-2022 Dan Church.
|
||||
|
||||
License GPLv3: GNU GPL version 3.0 (L<https://www.gnu.org/licenses/gpl-3.0.html>)
|
||||
|
||||
with Commons Clause 1.0 (L<https://commonsclause.com/>).
|
||||
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
||||
There is NO WARRANTY, to the extent permitted by law.
|
||||
|
||||
You may NOT use this software for commercial purposes.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Written by Dan Church S<E<lt>amphetamachine@gmail.comE<gt>>
|
||||
|
||||
=cut
|
||||
|
||||
use File::Find qw/ find /;
|
||||
use Getopt::Std qw/ getopts /;
|
||||
use Pod::Usage qw/ pod2usage /;
|
||||
|
||||
# :squash-ignore-start:
|
||||
# (this prepends to the load path)
|
||||
use FindBin qw//;
|
||||
use lib $FindBin::RealBin;
|
||||
|
||||
require Directory::Simplify::File;
|
||||
require Directory::Simplify::FileHash;
|
||||
require Directory::Simplify::Instruction::Generator;
|
||||
require Directory::Simplify::Utils;
|
||||
# :squash-ignore-end:
|
||||
|
||||
sub HELP_MESSAGE {
|
||||
my $fh = shift;
|
||||
&pod2usage(
|
||||
-verbose => 1,
|
||||
-exitval => 0,
|
||||
);
|
||||
}
|
||||
|
||||
MAIN: {
|
||||
&getopts('vfm:M:z', \ my %opts)
|
||||
|| &pod2usage(
|
||||
-exitval => 2,
|
||||
-msg => "Try '$0 --help' for more information",
|
||||
);
|
||||
|
||||
my $verbose = defined $opts{v};
|
||||
my $print_freed = defined $opts{f};
|
||||
my $files_match = $opts{m} || '';
|
||||
my $files_exclude = $opts{M} || '';
|
||||
my $include_zero_length_files = defined $opts{z};
|
||||
my $print_progress = $verbose;
|
||||
|
||||
# correct relative paths
|
||||
# OR if no directories given, search the current directory
|
||||
my @dirs_to_process = map { Cwd::abs_path($_) } (@ARGV ? @ARGV : ($ENV{PWD}));
|
||||
|
||||
my @files;
|
||||
print STDERR 'Finding files...'
|
||||
if $verbose;
|
||||
|
||||
&find(sub {
|
||||
# outright skip directories (don't report skip)
|
||||
return if -d $File::Find::name;
|
||||
|
||||
# skip non-existent files and links
|
||||
unless (-f $File::Find::name && ! -l $File::Find::name) {
|
||||
return;
|
||||
}
|
||||
|
||||
push @files, Directory::Simplify::File->new($File::Find::name);
|
||||
}, @dirs_to_process);
|
||||
|
||||
printf STDERR "%d files found",
|
||||
scalar @files
|
||||
if $verbose;
|
||||
|
||||
# Limit to or exclude file patterns specified by `-m' or `-M', respectively
|
||||
#
|
||||
# Truth table:
|
||||
# -m matches | -M is used & matches | !return?
|
||||
# 0 | 0 | 0
|
||||
# 0 | 1 | 0
|
||||
# 1 | 0 | 1
|
||||
# 1 | 1 | 0
|
||||
# note: m// will match everything
|
||||
my $file_ct_before_filter = scalar @files;
|
||||
@files = grep {
|
||||
$_->{rel_name} =~ $files_match
|
||||
} @files;
|
||||
if ($file_ct_before_filter != scalar @files) {
|
||||
printf STDERR " (%d files filtered by -m rule)",
|
||||
$file_ct_before_filter - scalar @files
|
||||
if $verbose;
|
||||
}
|
||||
if (length $files_exclude) {
|
||||
$file_ct_before_filter = scalar @files;
|
||||
@files = grep {
|
||||
not $_->{rel_name} =~ $files_exclude
|
||||
} @files;
|
||||
if ($file_ct_before_filter != scalar @files) {
|
||||
printf STDERR " (%d files filtered by -M rule)",
|
||||
$file_ct_before_filter - scalar @files
|
||||
if $verbose;
|
||||
}
|
||||
}
|
||||
|
||||
# Shortcut: Only generate hashes and inspect files that do not have a
|
||||
# unique size. The reasoning being that file sizes do not match, there's no
|
||||
# possible way those two files can have the same contents.
|
||||
my %file_sizes;
|
||||
++$file_sizes{$_->{size}} foreach @files;
|
||||
@files = grep {
|
||||
$file_sizes{$_->{size}} > 1
|
||||
} @files;
|
||||
|
||||
printf STDERR " (%d candidates).\n",
|
||||
scalar @files
|
||||
if $verbose;
|
||||
|
||||
unless (@files) {
|
||||
printf STDERR "Nothing to do.\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
print STDERR "Generating hashes..." if $verbose;
|
||||
my $filehash = Directory::Simplify::FileHash->new;
|
||||
my $report_every = 1; # seconds
|
||||
my $processed_bytes = 0;
|
||||
my $last_report = time;
|
||||
my $total_size_hr = sprintf "%0.4G %s", Directory::Simplify::Utils::hr_size(&sum(map { $_->{size} } @files) or 0);
|
||||
my $cb;
|
||||
if ($print_progress) {
|
||||
printf STDERR "\e\x{37}";
|
||||
$cb = sub {
|
||||
my ($file, $now) = (shift, time);
|
||||
$processed_bytes += $file->{size};
|
||||
if ($now >= $last_report + $report_every) {
|
||||
printf STDERR "\e\x{38}%8s / %8s",
|
||||
(sprintf '%0.4G %s', Directory::Simplify::Utils::hr_size($processed_bytes)),
|
||||
$total_size_hr;
|
||||
$last_report = $now;
|
||||
}
|
||||
};
|
||||
}
|
||||
$filehash->add({
|
||||
files => \@files,
|
||||
callback => $cb,
|
||||
});
|
||||
print STDERR "done.\n"
|
||||
if $verbose;
|
||||
|
||||
my $generator = Directory::Simplify::Instruction::Generator->new(
|
||||
filehash => $filehash,
|
||||
min_size => ($include_zero_length_files ? 0 : 1),
|
||||
);
|
||||
|
||||
my $freed_bytes = 0;
|
||||
|
||||
foreach my $inst ($generator->instructions) {
|
||||
print STDERR $inst, "\n" if $verbose;
|
||||
$inst->run;
|
||||
$freed_bytes += $inst->bytes_freed;
|
||||
}
|
||||
|
||||
printf STDERR "freed %s bytes (%0.4G %s)\n",
|
||||
Directory::Simplify::Utils::addcommas($freed_bytes),
|
||||
Directory::Simplify::Utils::hr_size($freed_bytes)
|
||||
if $print_freed or $verbose;
|
||||
}
|
||||
|
|
@ -1,611 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
|
||||
package main;
|
||||
use strict;
|
||||
use warnings;
|
||||
use List::Util qw/ sum /;
|
||||
|
||||
our $VERSION = '3.1.0';
|
||||
|
||||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
simplify_static_dir - optimize directories for size by combining identical
|
||||
files
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<simplify_static_dir> [I<OPTIONS>] [I<DIR>]...
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
The more files this script can process at once, the better. It maintains an
|
||||
internal hash of files indexed by their SHA1 checksum. When it finds a
|
||||
collision it removes the file with the least amount of file system links, then
|
||||
creates a hard link to the other in its place. The larger the number of files
|
||||
scanned, the more likely it is that there will be collisions.
|
||||
|
||||
There are failsafes in place, though. If somehow two files' SHA1 checksums are
|
||||
identical, but the file sizes do not match, this program will error out (and
|
||||
you can go ahead and report the collision; it may be worth money).
|
||||
|
||||
There are other features built in as well. Following the logic that unique
|
||||
data, once created has the attribute of being unique of that point in time,
|
||||
this script will copy the timestamp of the older file onto the newer file just
|
||||
before it makes the hard link. Therefore, many times, simplified directories
|
||||
will have the appearance of being older than they actually are.
|
||||
|
||||
From the perspective of any program reading files from the simplified
|
||||
directories, the files will lookB<*> and behave the same way as the initial
|
||||
state.
|
||||
|
||||
B<*> Except for having an increased number of hard links.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item B<-v>
|
||||
|
||||
Verbose output.
|
||||
|
||||
=item B<-f>
|
||||
|
||||
Print a sum of the number of freed bytes.
|
||||
|
||||
=item B<-m> I<REGEX>
|
||||
|
||||
Only match file paths matching I<REGEX>.
|
||||
|
||||
=item B<-M> I<REGEX>
|
||||
|
||||
Exclude file paths matching I<REGEX>.
|
||||
|
||||
=item B<-z>
|
||||
|
||||
Include zero-length files in search. Normally they are ignored (you don't save
|
||||
diskspace by hard-linking empty files).
|
||||
|
||||
=item B<--help>
|
||||
|
||||
Output this help message and exit.
|
||||
|
||||
=item B<--version>
|
||||
|
||||
Output version information and exit.
|
||||
|
||||
=back
|
||||
|
||||
By default, scans the current directory.
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright (C) 2010-2022 Dan Church.
|
||||
|
||||
License GPLv3: GNU GPL version 3.0 (L<https://www.gnu.org/licenses/gpl-3.0.html>)
|
||||
|
||||
with Commons Clause 1.0 (L<https://commonsclause.com/>).
|
||||
|
||||
This is free software: you are free to change and redistribute it.
|
||||
|
||||
There is NO WARRANTY, to the extent permitted by law.
|
||||
|
||||
You may NOT use this software for commercial purposes.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Written by Dan Church S<E<lt>amphetamachine@gmail.comE<gt>>
|
||||
|
||||
=cut
|
||||
|
||||
use File::Find qw/ find /;
|
||||
use Getopt::Std qw/ getopts /;
|
||||
use Pod::Usage qw/ pod2usage /;
|
||||
|
||||
sub HELP_MESSAGE {
|
||||
my $fh = shift;
|
||||
&pod2usage(
|
||||
-verbose => 1,
|
||||
-exitval => 0,
|
||||
);
|
||||
}
|
||||
|
||||
MAIN: {
|
||||
&getopts('vfm:M:z', \ my %opts)
|
||||
|| &pod2usage(
|
||||
-exitval => 2,
|
||||
-msg => "Try '$0 --help' for more information",
|
||||
);
|
||||
|
||||
my $verbose = defined $opts{v};
|
||||
my $print_freed = defined $opts{f};
|
||||
my $files_match = $opts{m} || '';
|
||||
my $files_exclude = $opts{M} || '';
|
||||
my $include_zero_length_files = defined $opts{z};
|
||||
my $print_progress = $verbose;
|
||||
|
||||
# correct relative paths
|
||||
# OR if no directories given, search the current directory
|
||||
my @dirs_to_process = map { Cwd::abs_path($_) } (@ARGV ? @ARGV : ($ENV{PWD}));
|
||||
|
||||
my @files;
|
||||
print STDERR 'Finding files...'
|
||||
if $verbose;
|
||||
|
||||
&find(sub {
|
||||
# outright skip directories (don't report skip)
|
||||
return if -d $File::Find::name;
|
||||
|
||||
# skip non-existent files and links
|
||||
unless (-f $File::Find::name && ! -l $File::Find::name) {
|
||||
return;
|
||||
}
|
||||
|
||||
push @files, Directory::Simplify::File->new($File::Find::name);
|
||||
}, @dirs_to_process);
|
||||
|
||||
printf STDERR "%d files found",
|
||||
scalar @files
|
||||
if $verbose;
|
||||
|
||||
# Limit to or exclude file patterns specified by `-m' or `-M', respectively
|
||||
#
|
||||
# Truth table:
|
||||
# -m matches | -M is used & matches | !return?
|
||||
# 0 | 0 | 0
|
||||
# 0 | 1 | 0
|
||||
# 1 | 0 | 1
|
||||
# 1 | 1 | 0
|
||||
# note: m// will match everything
|
||||
my $file_ct_before_filter = scalar @files;
|
||||
@files = grep {
|
||||
$_->{rel_name} =~ $files_match
|
||||
} @files;
|
||||
if ($file_ct_before_filter != scalar @files) {
|
||||
printf STDERR " (%d files filtered by -m rule)",
|
||||
$file_ct_before_filter - scalar @files
|
||||
if $verbose;
|
||||
}
|
||||
if (length $files_exclude) {
|
||||
$file_ct_before_filter = scalar @files;
|
||||
@files = grep {
|
||||
not $_->{rel_name} =~ $files_exclude
|
||||
} @files;
|
||||
if ($file_ct_before_filter != scalar @files) {
|
||||
printf STDERR " (%d files filtered by -M rule)",
|
||||
$file_ct_before_filter - scalar @files
|
||||
if $verbose;
|
||||
}
|
||||
}
|
||||
|
||||
# Shortcut: Only generate hashes and inspect files that do not have a
|
||||
# unique size. The reasoning being that file sizes do not match, there's no
|
||||
# possible way those two files can have the same contents.
|
||||
my %file_sizes;
|
||||
++$file_sizes{$_->{size}} foreach @files;
|
||||
@files = grep {
|
||||
$file_sizes{$_->{size}} > 1
|
||||
} @files;
|
||||
|
||||
printf STDERR " (%d candidates).\n",
|
||||
scalar @files
|
||||
if $verbose;
|
||||
|
||||
unless (@files) {
|
||||
printf STDERR "Nothing to do.\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
print STDERR "Generating hashes..." if $verbose;
|
||||
my $filehash = Directory::Simplify::FileHash->new;
|
||||
my $report_every = 1; # seconds
|
||||
my $processed_bytes = 0;
|
||||
my $last_report = time;
|
||||
my $total_size_hr = sprintf "%0.4G %s", Directory::Simplify::Utils::hr_size(&sum(map { $_->{size} } @files) or 0);
|
||||
my $cb;
|
||||
if ($print_progress) {
|
||||
printf STDERR "\e\x{37}";
|
||||
$cb = sub {
|
||||
my ($file, $now) = (shift, time);
|
||||
$processed_bytes += $file->{size};
|
||||
if ($now >= $last_report + $report_every) {
|
||||
printf STDERR "\e\x{38}%8s / %8s",
|
||||
(sprintf '%0.4G %s', Directory::Simplify::Utils::hr_size($processed_bytes)),
|
||||
$total_size_hr;
|
||||
$last_report = $now;
|
||||
}
|
||||
};
|
||||
}
|
||||
$filehash->add({
|
||||
files => \@files,
|
||||
callback => $cb,
|
||||
});
|
||||
print STDERR "done.\n"
|
||||
if $verbose;
|
||||
|
||||
my $generator = Directory::Simplify::Instruction::Generator->new(
|
||||
filehash => $filehash,
|
||||
min_size => ($include_zero_length_files ? 0 : 1),
|
||||
);
|
||||
|
||||
my $freed_bytes = 0;
|
||||
|
||||
foreach my $inst ($generator->instructions) {
|
||||
print STDERR $inst, "\n" if $verbose;
|
||||
$inst->run;
|
||||
$freed_bytes += $inst->bytes_freed;
|
||||
}
|
||||
|
||||
printf STDERR "freed %s bytes (%0.4G %s)\n",
|
||||
Directory::Simplify::Utils::addcommas($freed_bytes),
|
||||
Directory::Simplify::Utils::hr_size($freed_bytes)
|
||||
if $print_freed or $verbose;
|
||||
}
|
||||
|
||||
package Directory::Simplify::Utils;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
sub addcommas {
|
||||
my @added;
|
||||
foreach my $num (@_) {
|
||||
# don't split anything after the decimal
|
||||
my @parts = split /\./, $num;
|
||||
while ($parts[0] =~ s/(\d)(\d{3}(?:\D|$))/$1,$2/) {
|
||||
}
|
||||
push @added, (join '.', @parts);
|
||||
}
|
||||
wantarray ? @added : $added[0]
|
||||
}
|
||||
|
||||
sub hr_size {
|
||||
my $sz = shift;
|
||||
my @sizes = qw/ B KB MB GB TB PB EB ZB YB /;
|
||||
my $fact = 1024;
|
||||
my $thresh = 0.1;
|
||||
my @ret;
|
||||
foreach my $exp (reverse 0 .. $#sizes) {
|
||||
if ($sz > (1 - $thresh) * $fact ** $exp) {
|
||||
@ret = ($sz / $fact ** $exp, $sizes[$exp]);
|
||||
last;
|
||||
}
|
||||
}
|
||||
|
||||
# default to ($sz, 'bytes')
|
||||
@ret = ($sz, $sizes[0]) unless @ret;
|
||||
|
||||
wantarray ? @ret : "@ret"
|
||||
}
|
||||
|
||||
sub shell_quote {
|
||||
# shell-escape argument for inclusion in non-interpolated single quotes
|
||||
my @transformed = map {
|
||||
(my $out = $_)
|
||||
=~ s/'/'\\''/g;
|
||||
"'$out'";
|
||||
} @_;
|
||||
wantarray ? @transformed : $transformed[0];
|
||||
}
|
||||
|
||||
package Directory::Simplify::Instruction::Hardlink;
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
freed => 0,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# hard link the files
|
||||
|
||||
unless (unlink $self->{target}->{name}) {
|
||||
die "Failed to remove file `$self->{target}->{name}': $!\n";
|
||||
}
|
||||
unless (link $self->{source}->{name}, $self->{target}->{name}) {
|
||||
die "Failed to hard link `$self->{source}->{name}' => `$self->{target}->{name}': $!";
|
||||
}
|
||||
# bookkeeping
|
||||
++$self->{source}->{nlink};
|
||||
if (--$self->{target}->{nlink} == 0) {
|
||||
$self->{freed} = $self->{target}->{size};
|
||||
}
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
my $self = shift;
|
||||
return $self->{freed};
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'ln -sf %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
package Directory::Simplify::Instruction::CopyTimestamp;
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub run {
|
||||
my $self = shift;
|
||||
# preserve older time stamp
|
||||
utime $self->{source}->{atime}, $self->{source}->{mtime}, $self->{target}->{name};
|
||||
}
|
||||
|
||||
sub bytes_freed {
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
return sprintf 'touch -r %s %s', Directory::Simplify::Utils::shell_quote($self->{source}->{name}, $self->{target}->{name});
|
||||
}
|
||||
|
||||
package Directory::Simplify::Instruction::Generator;
|
||||
use strict;
|
||||
use warnings;
|
||||
use overload '""' => 'as_string';
|
||||
use File::Basename qw/ dirname /;
|
||||
use File::Compare qw/ compare /;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
filehash => undef,
|
||||
min_size => 1,
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub as_string {
|
||||
my $self = shift;
|
||||
join "\n", $self->instructions;
|
||||
}
|
||||
|
||||
sub buckets {
|
||||
my $self = shift;
|
||||
|
||||
my @candidate_lists = $self->{filehash}->entries;
|
||||
|
||||
my @buckets;
|
||||
foreach my $candidates (@candidate_lists) {
|
||||
my @ca = @{$candidates}; # make a clone
|
||||
my @these_buckets;
|
||||
|
||||
# at least two files needed to link together
|
||||
if (@ca > 1) {
|
||||
ELIMINATOR: while (@ca) {
|
||||
my $entry = shift @ca;
|
||||
|
||||
next ELIMINATOR if $self->_entry_should_be_skipped($entry);
|
||||
|
||||
foreach my $bucket_idx (0 .. $#these_buckets) {
|
||||
if (&_entries_are_hard_linkable($these_buckets[$bucket_idx]->[0], $entry)) {
|
||||
push @{$these_buckets[$bucket_idx]}, $entry;
|
||||
next ELIMINATOR;
|
||||
}
|
||||
}
|
||||
# didn't find a bucket (even though the hash matched!)
|
||||
# -> new bucket
|
||||
push @these_buckets, [$entry];
|
||||
}
|
||||
}
|
||||
push @buckets, @these_buckets;
|
||||
}
|
||||
|
||||
@buckets
|
||||
}
|
||||
|
||||
sub _entry_should_be_skipped {
|
||||
my ($self, $entry_a) = @_;
|
||||
# too small to be hard-linked
|
||||
if ($entry_a->{size} < $self->{min_size}) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_are_hard_linkable {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
# obviously, if the sizes aren't the same, they're not the same file
|
||||
unless (&_entries_sizes_match($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
# they're the same file, don't try it
|
||||
if (&_entries_are_already_hard_linked($entry_a, $entry_b)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (&_entries_contents_match($entry_a, $entry_b)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub oldest_mtime {
|
||||
my $self = shift;
|
||||
return sort {
|
||||
$a->{mtime} <=> $b->{mtime}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub more_linked {
|
||||
my $self = shift;
|
||||
my %warned;
|
||||
return sort {
|
||||
if (! -w &dirname($a->{name})) {
|
||||
warn "Warning: $a->{name} not able to be unlinked!" unless $warned{$a->{name}}++;
|
||||
return 1; # favor a -> front
|
||||
} elsif (! -w &dirname($b->{name})) {
|
||||
warn "Warning: $b->{name} not able to be unlinked!" unless $warned{$b->{name}}++;
|
||||
return -1; # favor b -> front
|
||||
}
|
||||
$b->{nlink} <=> $a->{nlink}
|
||||
} @_;
|
||||
}
|
||||
|
||||
sub _entries_are_already_hard_linked {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
if ($entry_a->{ino} == $entry_b->{ino} and
|
||||
$entry_a->{dev} == $entry_b->{dev}) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
sub _entries_sizes_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
if ($entry_a->{size} != $entry_b->{size}) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
sub _entries_contents_match {
|
||||
my ($entry_a, $entry_b) = @_;
|
||||
|
||||
my $contents_same = (0 == &File::Compare::compare($entry_a->{name}, $entry_b->{name}));
|
||||
|
||||
# warn about hash collision
|
||||
unless ($contents_same) {
|
||||
warn "Hash collision between files:\n* $entry_a->{name}\n* $entry_b->{name}\n (don't panic)\n";
|
||||
}
|
||||
return $contents_same;
|
||||
}
|
||||
|
||||
# generate hardlink instructions
|
||||
sub instructions {
|
||||
require Scalar::Util;
|
||||
my $self = shift;
|
||||
|
||||
# start generating instructions
|
||||
my @inst;
|
||||
foreach my $bucket ($self->buckets) {
|
||||
|
||||
# of the bucket, find the oldest timestamp
|
||||
my ($oldest_entry) = $self->oldest_mtime(@{$bucket});
|
||||
|
||||
# of the bucket, find the file most embedded in the file system
|
||||
my @to_link = $self->more_linked(@{$bucket});
|
||||
my $most_linked_entry = shift @to_link;
|
||||
foreach my $entry (@to_link) {
|
||||
# XXX there shouldn't be a need to update entries' link counts,
|
||||
# since this generates all the instructions at once
|
||||
push @inst, Directory::Simplify::Instruction::Hardlink->new(
|
||||
source => $most_linked_entry,
|
||||
target => $entry,
|
||||
);
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $entry,
|
||||
);
|
||||
}
|
||||
if (&Scalar::Util::refaddr($most_linked_entry) != &Scalar::Util::refaddr($oldest_entry)) {
|
||||
# most_linked_entry should get its timestamp updated
|
||||
push @inst, Directory::Simplify::Instruction::CopyTimestamp->new(
|
||||
source => $oldest_entry,
|
||||
target => $most_linked_entry,
|
||||
);
|
||||
}
|
||||
}
|
||||
@inst
|
||||
}
|
||||
|
||||
package Directory::Simplify::File;
|
||||
use strict;
|
||||
use warnings;
|
||||
require Cwd;
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
my $rel_name = shift;
|
||||
my $self = bless {
|
||||
rel_name => $rel_name,
|
||||
name => Cwd::abs_path($rel_name),
|
||||
}, $class;
|
||||
(@{$self}{qw/ dev ino mode nlink uid gid rdev size
|
||||
atime mtime ctime blksize blocks /})
|
||||
= lstat $self->{name};
|
||||
$self
|
||||
}
|
||||
|
||||
sub hash {
|
||||
my $self = shift;
|
||||
unless (defined $self->{_hash}) {
|
||||
require Digest::SHA;
|
||||
my $ctx = Digest::SHA->new;
|
||||
$ctx->addfile($self->{name});
|
||||
$self->{_hash} = $ctx->hexdigest;
|
||||
}
|
||||
$self->{_hash}
|
||||
}
|
||||
|
||||
package Directory::Simplify::FileHash;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Object for abstracting management of a hashed filesystem
|
||||
|
||||
=cut
|
||||
|
||||
sub new {
|
||||
my $class = shift;
|
||||
return bless {
|
||||
_entries => {},
|
||||
_files_in_hash => {},
|
||||
@_,
|
||||
}, $class;
|
||||
}
|
||||
|
||||
sub add {
|
||||
my $self = shift;
|
||||
my (@files, $callback);
|
||||
if (ref $_[0] eq 'HASH') {
|
||||
# Called method like { files => [] }
|
||||
my %opts = %{$_[0]};
|
||||
@files = @{$opts{files}};
|
||||
$callback = $opts{callback};
|
||||
} else {
|
||||
@files = @_;
|
||||
}
|
||||
foreach my $file (@files) {
|
||||
unless (ref $file eq 'Directory::Simplify::File') {
|
||||
$file = Directory::Simplify::File->new($file);
|
||||
}
|
||||
unless ($self->{_files_in_hash}->{$file->{name}}) {
|
||||
my $hash = $file->hash;
|
||||
|
||||
unless (defined $self->{_entries}->{$hash}) {
|
||||
$self->{_entries}->{$hash} = [];
|
||||
}
|
||||
push @{$self->{_entries}->{$hash}}, $file;
|
||||
&{$callback}($file) if ref $callback eq 'CODE';
|
||||
}
|
||||
$self->{_files_in_hash}->{$file->{name}} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
sub entries {
|
||||
my $self = shift;
|
||||
values %{$self->{_entries}}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
#!/usr/bin/perl
|
||||
# vi: et sts=4 sw=4 ts=4
|
||||
|
||||
# Squashes together the parts of the app into the single script.
|
||||
# (Adapted from the script that squashes App::Ack - see https://github.com/beyondgrep/ack3)
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $code;
|
||||
for my $arg (@ARGV) {
|
||||
my $filename = $arg;
|
||||
if ($arg =~ /::/) {
|
||||
my $key = "$arg.pm";
|
||||
$key =~ s{::}{/}g;
|
||||
$filename = $INC{$key} or die "Can't find the file for $arg";
|
||||
}
|
||||
|
||||
warn "Reading $filename\n";
|
||||
open my $fh, '<', $filename or die "Can't open $filename: $!";
|
||||
my $in_pod = 0;
|
||||
my $in_ignore_section = 0;
|
||||
while (<$fh>) {
|
||||
if (/#.*:squash-ignore-start:$/) {
|
||||
$in_ignore_section = 1;
|
||||
next;
|
||||
} elsif (/#.*:squash-ignore-end:$/) {
|
||||
$in_ignore_section = 0;
|
||||
next;
|
||||
}
|
||||
next if $in_ignore_section;
|
||||
next if /#.*:squash-ignore-line:$/;
|
||||
|
||||
next if /^\s*1;$/;
|
||||
|
||||
if ($filename =~ /\.pm$/) {
|
||||
# See if we're in module POD blocks
|
||||
if (/^=(\w+)/) {
|
||||
$in_pod = ($1 ne 'cut');
|
||||
next;
|
||||
}
|
||||
elsif ($in_pod) {
|
||||
next;
|
||||
}
|
||||
next if /^# vi:/;
|
||||
}
|
||||
|
||||
# Remove Perl::Critic comments.
|
||||
# I'd like to remove all comments, but this is a start
|
||||
s{\s*##.+critic.*}{};
|
||||
$code .= $_;
|
||||
}
|
||||
close $fh;
|
||||
}
|
||||
|
||||
# Remove repeated newlines between paragraphs
|
||||
$code =~ s/\n\n+/\n\n/gs;
|
||||
|
||||
print $code;
|
||||
|
||||
exit 0;
|
Loading…
Reference in New Issue