2021-06-29 16:26:43 +00:00
|
|
|
#!/bin/bash
|
|
|
|
# vi: et sts=4 sw=4 ts=4
|
|
|
|
|
2021-09-29 20:18:05 +00:00
|
|
|
USAGE() {
|
|
|
|
printf 'Usage: %s [OPTIONS] [--] FILE...\n' \
|
|
|
|
"${0##*/}"
|
|
|
|
}
|
|
|
|
|
|
|
|
HELP_MESSAGE() {
|
|
|
|
USAGE
|
|
|
|
cat <<EOF
|
|
|
|
Optimize PDF files for size.
|
|
|
|
|
|
|
|
--help, -h Show this help message.
|
|
|
|
--backup=SUFFIX Keep a backup of un-optimized PDF files.
|
|
|
|
--no-backup Turn off backups (default).
|
|
|
|
--careful Stop if there are warnings in the PDF (default).
|
|
|
|
--no-careful Don't stop if there are warnings in the PDF.
|
|
|
|
--force, -f Force overwriting backups. No effect if --no-backup.
|
|
|
|
--no-force Prompt when overwriting backups (default). No effect if
|
|
|
|
--no-backup.
|
|
|
|
--lossy Let qpdf(1) try to optimize images for size. MAY LEAD TO
|
|
|
|
IMAGE QUALITY DEGRADATION. See qpdf(1) for more
|
|
|
|
information.
|
|
|
|
--no-lossy Don't optimize images (default). This is safer.
|
|
|
|
--preserve-timestamp Copy timestamp from original file.
|
|
|
|
--no-preserve-timestamp Omit timestamp from original file (default).
|
|
|
|
-- Terminate options list.
|
|
|
|
|
|
|
|
Copyright (C) 2021 Dan Church.
|
|
|
|
License GPLv3: GNU GPL version 3.0 (https://www.gnu.org/licenses/gpl-3.0.html)
|
|
|
|
with Commons Clause 1.0 (https://commonsclause.com/).
|
|
|
|
This is free software: you are free to change and redistribute it.
|
|
|
|
There is NO WARRANTY, to the extent permitted by law.
|
|
|
|
You may NOT use this software for commercial purposes.
|
|
|
|
EOF
|
|
|
|
}
|
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
PRESERVE_TIMESTAMP=0
|
2021-06-29 18:30:42 +00:00
|
|
|
LOSSY=0
|
2021-09-29 20:17:35 +00:00
|
|
|
KEEP_BACKUP_SUFFIX=
|
2021-07-21 15:14:14 +00:00
|
|
|
FORCE_OVERWRITE=0
|
|
|
|
ENCODE_THRU_WARNINGS=0
|
2021-06-29 16:26:43 +00:00
|
|
|
|
2022-10-31 21:00:31 +00:00
|
|
|
TEMP_DIR=$(mktemp -d -t "${0##*/}.XXXXXX")
|
2021-06-29 16:26:43 +00:00
|
|
|
cleanup() {
|
2022-10-31 21:00:31 +00:00
|
|
|
rm -fr -- "$TEMP_DIR"
|
2021-06-29 16:26:43 +00:00
|
|
|
}
|
|
|
|
trap 'cleanup' EXIT
|
|
|
|
|
2021-06-29 19:54:58 +00:00
|
|
|
FILES=()
|
|
|
|
NO_MORE_FLAGS=0
|
|
|
|
for ARG; do
|
|
|
|
# Assume arguments that don't begin with a - are supposed to be files or other operands
|
|
|
|
if [[ $NO_MORE_FLAGS -eq 0 && $ARG = -* ]]; then
|
|
|
|
case "$ARG" in
|
|
|
|
--backup=*)
|
|
|
|
KEEP_BACKUP_SUFFIX=${ARG#*=}
|
|
|
|
;;
|
|
|
|
--no-backup)
|
|
|
|
KEEP_BACKUP_SUFFIX=
|
|
|
|
;;
|
2021-07-21 15:14:14 +00:00
|
|
|
--careful)
|
|
|
|
ENCODE_THRU_WARNINGS=0
|
|
|
|
;;
|
|
|
|
--no-careful)
|
|
|
|
ENCODE_THRU_WARNINGS=1
|
|
|
|
;;
|
2021-06-29 19:54:58 +00:00
|
|
|
--force|-f)
|
2021-07-21 15:14:14 +00:00
|
|
|
FORCE_OVERWRITE=1
|
2021-06-29 19:54:58 +00:00
|
|
|
;;
|
2021-07-21 15:14:14 +00:00
|
|
|
--no-force)
|
|
|
|
FORCE_OVERWRITE=0
|
2021-06-29 19:54:58 +00:00
|
|
|
;;
|
|
|
|
--lossy)
|
|
|
|
LOSSY=1
|
|
|
|
;;
|
|
|
|
--no-lossy)
|
|
|
|
LOSSY=0
|
|
|
|
;;
|
2021-08-26 19:19:38 +00:00
|
|
|
--preserve-timestamp)
|
|
|
|
PRESERVE_TIMESTAMP=1
|
|
|
|
;;
|
|
|
|
--no-preserve-timestamp)
|
|
|
|
PRESERVE_TIMESTAMP=0
|
|
|
|
;;
|
2021-09-29 20:18:05 +00:00
|
|
|
--help|-h)
|
|
|
|
HELP_MESSAGE
|
|
|
|
exit 0
|
|
|
|
;;
|
2021-06-29 19:54:58 +00:00
|
|
|
--)
|
|
|
|
NO_MORE_FLAGS=1
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
printf 'Unrecognized flag: %s\n' \
|
|
|
|
"$ARG" \
|
|
|
|
>&2
|
|
|
|
USAGE >&2
|
|
|
|
exit 2
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
else
|
|
|
|
FILES+=("$ARG")
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2022-10-31 21:11:54 +00:00
|
|
|
if [[ ${#FILES[@]} -eq 0 ]]; then
|
|
|
|
USAGE >&2
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
file_size() {
|
|
|
|
stat \
|
|
|
|
--format='%s' \
|
|
|
|
--dereference \
|
|
|
|
-- \
|
|
|
|
"$@" \
|
|
|
|
2>/dev/null
|
|
|
|
}
|
|
|
|
|
2021-06-29 16:51:50 +00:00
|
|
|
# produces a human-readable size from the byte count passed to it
|
|
|
|
hr_size() (
|
|
|
|
declare -i BYTES=$1
|
|
|
|
|
|
|
|
#UNITS=(B KB MB GB TB PB EB ZB YB) # shell math can only go so far...
|
|
|
|
UNITS=(B KB MB GB TB)
|
|
|
|
FACT=1024
|
|
|
|
THRESH=9/10
|
|
|
|
DECIMALS=1
|
|
|
|
DECIMALS_FACTOR=$(( 10 ** DECIMALS ))
|
|
|
|
|
|
|
|
# cycle through units from largest to smallest, exiting when it finds the
|
|
|
|
# largest applicable unit
|
|
|
|
for (( EXP = ${#UNITS[@]} - 1; EXP > -1; --EXP )); do
|
|
|
|
# check if the unit is close enough to the unit's size, within the
|
|
|
|
# threshold
|
|
|
|
if [[ $BYTES -gt $((FACT ** EXP * $THRESH)) ]]; then
|
|
|
|
# we found the applicable unit
|
|
|
|
|
|
|
|
# must multiply by a factor of 10 here to not truncate
|
|
|
|
# the given number of decimal places after the point
|
|
|
|
HR_VAL=$(( BYTES * DECIMALS_FACTOR / FACT ** EXP ))
|
|
|
|
|
|
|
|
# put the decimal point in
|
|
|
|
if [[ $DECIMALS -gt 0 ]]; then
|
|
|
|
HR_VAL=$(( HR_VAL / DECIMALS_FACTOR )).$(( HR_VAL % DECIMALS_FACTOR ))
|
|
|
|
fi
|
|
|
|
|
|
|
|
HR_UNIT=${UNITS[$EXP]}
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [[ -z $HR_UNIT ]]; then
|
|
|
|
HR_VAL=$BYTES
|
|
|
|
HR_UNIT=${UNITS[0]}
|
|
|
|
fi
|
|
|
|
|
|
|
|
printf '%g %s\n' "$HR_VAL" "$HR_UNIT"
|
|
|
|
)
|
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
# copies $2 over to $1 if $2 is smaller than $1
|
|
|
|
use_smaller() {
|
|
|
|
# if `$TEMP' isn't empty and it's of a smaller size than `$FILE',
|
|
|
|
# preserve every attribute and replace `$FILE' with `$TEMP'
|
|
|
|
local \
|
|
|
|
FILE=$1 \
|
|
|
|
TEMP=$2 \
|
|
|
|
ORIGSIZE \
|
2021-07-21 15:14:14 +00:00
|
|
|
TEMPSIZE \
|
|
|
|
MV_ARGS=('-v')
|
|
|
|
|
|
|
|
if [[ $FORCE_OVERWRITE -eq 0 ]]; then
|
|
|
|
MV_ARGS+=('-i')
|
|
|
|
fi
|
2021-06-29 16:26:43 +00:00
|
|
|
|
|
|
|
ORIGSIZE=$(file_size "$FILE")
|
|
|
|
TEMPSIZE=$(file_size "$TEMP")
|
|
|
|
|
|
|
|
if [[ -f $TEMP &&
|
|
|
|
$TEMPSIZE -gt 0 &&
|
|
|
|
$TEMPSIZE -lt $ORIGSIZE
|
|
|
|
]]; then
|
|
|
|
|
|
|
|
# Preserve attributes by copying them from the original file to the
|
|
|
|
# temporary one
|
|
|
|
chmod \
|
|
|
|
--reference="$FILE" \
|
|
|
|
-- \
|
|
|
|
"$TEMP" &&
|
|
|
|
|
|
|
|
if [[ $PRESERVE_TIMESTAMP -ne 0 ]]; then
|
|
|
|
touch \
|
|
|
|
--reference="$FILE" \
|
|
|
|
-- \
|
|
|
|
"$TEMP"
|
|
|
|
fi &&
|
|
|
|
|
|
|
|
if [[ $UID -eq 0 ]]; then
|
|
|
|
# We are root, so we can chown(1) things
|
|
|
|
chown \
|
|
|
|
--reference="$FILE" \
|
|
|
|
-- \
|
|
|
|
"$TEMP"
|
|
|
|
fi &&
|
|
|
|
|
2021-06-29 17:50:13 +00:00
|
|
|
if [[ -n $KEEP_BACKUP_SUFFIX ]]; then
|
2021-07-21 15:14:14 +00:00
|
|
|
mv "${MV_ARGS[@]}" -- "$FILE" "$FILE$KEEP_BACKUP_SUFFIX"
|
2021-06-29 17:50:13 +00:00
|
|
|
fi &&
|
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
cp \
|
|
|
|
--preserve=mode,ownership,timestamps \
|
|
|
|
-- \
|
|
|
|
"$TEMP" \
|
|
|
|
"$FILE"
|
|
|
|
|
2021-06-29 16:51:50 +00:00
|
|
|
if [[ $? -ne 0 ]]; then
|
|
|
|
printf 'Failed to optimize "%s"!\n' \
|
|
|
|
"$FILE" \
|
|
|
|
>&2
|
|
|
|
fi
|
2021-06-29 16:26:43 +00:00
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
# Protect against unsuccessful following file writes to our TEMP file
|
|
|
|
rm -f -- "$TEMP"
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-06-29 18:30:42 +00:00
|
|
|
QPDF_ARGS=(
|
|
|
|
--compression-level=9
|
|
|
|
--deterministic-id
|
2022-05-28 16:04:48 +00:00
|
|
|
--flatten-rotation
|
2021-06-29 18:30:42 +00:00
|
|
|
--object-streams=generate
|
|
|
|
--recompress-flate
|
|
|
|
--stream-data=compress
|
|
|
|
)
|
|
|
|
|
|
|
|
if [[ $LOSSY -ne 0 ]]; then
|
|
|
|
QPDF_ARGS+=(
|
|
|
|
--optimize-images
|
|
|
|
)
|
|
|
|
fi
|
2021-07-21 15:14:14 +00:00
|
|
|
if [[ $ENCODE_THRU_WARNINGS -ne 0 ]]; then
|
2021-06-29 18:42:54 +00:00
|
|
|
QPDF_ARGS+=(
|
|
|
|
--warning-exit-0
|
|
|
|
)
|
|
|
|
fi
|
2021-06-29 18:30:42 +00:00
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
ERRORS=0
|
2021-06-29 16:51:50 +00:00
|
|
|
FREED_TOTAL=0
|
2021-06-29 19:54:58 +00:00
|
|
|
for FILE in "${FILES[@]}"; do
|
2022-10-31 21:00:31 +00:00
|
|
|
TEMP=$(mktemp -p "$TEMP_DIR" -t 'file.XXXXXX')
|
2021-06-29 16:56:12 +00:00
|
|
|
rm -f -- "$TEMP"
|
2021-06-29 16:51:50 +00:00
|
|
|
BEGIN_FILESIZE=$(file_size "$FILE")
|
|
|
|
|
|
|
|
if
|
2021-06-29 18:30:42 +00:00
|
|
|
qpdf "${QPDF_ARGS[@]}" \
|
2021-06-29 16:56:12 +00:00
|
|
|
-- \
|
|
|
|
"$FILE" "$TEMP" &&
|
|
|
|
use_smaller "$FILE" "$TEMP"; then
|
2021-06-29 16:51:50 +00:00
|
|
|
|
|
|
|
END_FILESIZE=$(file_size "$FILE")
|
|
|
|
FREED=$(( BEGIN_FILESIZE - END_FILESIZE ))
|
|
|
|
FREED_HR=$(hr_size "$FREED")
|
|
|
|
(( FREED_TOTAL += FREED ))
|
|
|
|
|
|
|
|
printf '%s: freed %d bytes (%s)\n' \
|
|
|
|
"$FILE" \
|
|
|
|
"$FREED" \
|
|
|
|
"$FREED_HR"
|
|
|
|
else
|
2021-06-29 16:26:43 +00:00
|
|
|
(( ++ERRORS ))
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
2021-06-29 16:51:50 +00:00
|
|
|
FREED_TOTAL_HR=$(hr_size "$FREED_TOTAL")
|
|
|
|
printf 'all: freed %d bytes (%s)\n' "$FREED_TOTAL" "$FREED_TOTAL_HR"
|
|
|
|
|
2021-06-29 16:26:43 +00:00
|
|
|
if [[ $ERRORS -gt 0 ]]; then
|
|
|
|
exit 1
|
|
|
|
fi
|