base-data-manager/util/scrub.jq

48 lines
No EOL
1.7 KiB
Text

# Use this to process json files before loading them into unit tests
# Something like:
# fd -t f .json -0 | xargs -I % -0 -- jq -f scrub.jq "%" > "%"
# (Though you should remove the end `> "%"` first to get just the output without
# persisting to be sure it's what you want first)
def scrub:
walk(
if type == "string" then
if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then
"1.1.1.1"
elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then
"2000:0000:0000:0000:0000:0000:0000:0000"
elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then
"not_a_real_email@example.com"
elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then
# Leave these alone, you will have to manually go through these later and replace with
# placeholders
# TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise
# you need to manually grep for MANUAL REPAIR NEEDED for now
("MANUAL REPAIR NEEDED: \(.)" | stderr) | .
elif test("://") then
"url://somewhere"
elif test("/") then
"some/path"
else
"xxx"
end
elif type == "number" then
if 946702800 <= . and . <= 1893474000 then
# Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
((((. % 31557600) + 1704067200) / 5000 | floor) * 5000)
else
69
end
elif type == "array" then
# Keep only 2 elements, but scrub *those* elements
if length > 1 then
[ (.[0] | scrub), (.[1] | scrub) ]
elif length > 0 then
[ (.[0] | scrub) ]
else
[]
end
else
.
end
);
scrub