136 lines
5.4 KiB
Text
136 lines
5.4 KiB
Text
# Use this to process json files before loading them into unit tests
|
|
# Something like:
|
|
# fd -t f .json -0 | xargs -I % -0 -- jq -f scrub.jq "%" > "%"
|
|
# (Though you should remove the end `> "%"` first to get just the output without
|
|
# persisting to be sure it's what you want first)
|
|
|
|
def scrub_key:
|
|
if test("^[0-9]+$") then
|
|
("1" * length)
|
|
else
|
|
.
|
|
end;
|
|
|
|
def scrub_primitive:
|
|
if type == "string" then
|
|
if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then
|
|
# IPv4
|
|
"1.1.1.1"
|
|
elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then
|
|
# IPv6
|
|
"2000:0000:0000:0000:0000:0000:0000:0000"
|
|
elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then
|
|
# Email-like
|
|
"not_a_real_email@example.com"
|
|
elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then
|
|
# Leave these alone, you will have to manually go through these later and replace with
|
|
# placeholders
|
|
# TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise
|
|
# you need to manually grep for MANUAL REPAIR NEEDED for now
|
|
("MANUAL REPAIR NEEDED: \(.)" | stderr) | .
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+\\-][0-9]{2}:[0-9]{2}$") then
|
|
# iso date time without millis with timezone
|
|
"2020-04-13T10:09:08+00:00"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?[+\\-][0-9]{2}:[0-9]{2}$") then
|
|
# iso date time with millis with timezone
|
|
"2020-04-13T10:09:08.000000+00:00"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?$") then
|
|
# iso date time with millis no timezone
|
|
"2020-04-13T10:09:08.000"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$") then
|
|
# iso date time with z and the end (from fitbit export)
|
|
"2020-04-13T10:09:08Z"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]{1,6}Z$") then
|
|
# iso date time with z and the end and millis (from fitbit export)
|
|
"2020-04-13T10:09:08.000000Z"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}$") then
|
|
# iso date time but no seconds (from fitbit export)
|
|
"2020-04-13T10:09"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$") then
|
|
# iso date time range no T (from fitbit export)
|
|
"2020-04-13 10:09:08 - 2020-04-13 10:09:08"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?[+\\-][0-9]{4}$") then
|
|
# iso date time with millis with timezone no colon (fitbit export)
|
|
"2020-04-13 10:09:08+0000"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}$") then
|
|
# Just date
|
|
"2020-04-13"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC$") then
|
|
# Date format from snapchat export
|
|
"2020-04-13 10:09:08 UTC"
|
|
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$") then
|
|
# Date format from snapchat export
|
|
"2020-04-13 10:09:08"
|
|
elif test("^[0-9]{2}/[0-9]{2}/[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$") then
|
|
# Date format from fitbit export (wtf ugh)
|
|
"04/13/20 10:09:08"
|
|
elif test("^\\w{3} \\w{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC [0-9]{4}$") then
|
|
# Date format from fitbit export (uughhh)
|
|
"Mon Apr 13 10:09:08 UTC 2020"
|
|
elif test("^\\+[0-9]{2}:[0-2]{2}$") then
|
|
# UTC offset (from fitbit export)
|
|
"+00:00"
|
|
elif test("^-[0-9]{2}:[0-2]{2}$") then
|
|
# negative UTC offset (from fitbit export)
|
|
"-00:00"
|
|
elif test("^[0-9]+$") then
|
|
# preserve length of the string
|
|
"1" * length
|
|
elif test("^-?[0-9]+(\\.[0-9]+)?$") then
|
|
# decmial number in a string
|
|
gsub("[0-9]"; "1")
|
|
elif test("^[0-9a-fA-F]+$") then #hexadecimal string
|
|
# repeat the hex pattern and truncate to original length
|
|
("a1" * length)[:length]
|
|
elif . == "" then
|
|
# prevents empty string from just returning null instead of empty string
|
|
""
|
|
elif . == "true" or . == "false" then
|
|
"false"
|
|
elif test("://") then
|
|
"url://somewhere"
|
|
elif test("/") then
|
|
"some/path"
|
|
elif . == "null" then
|
|
# From fitbit export in csv
|
|
"null"
|
|
else
|
|
# Preserve string length for other strings
|
|
"x" * length
|
|
end
|
|
elif type == "number" then
|
|
if 946702800 <= . and . <= 1893474000 then
|
|
# Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
|
|
((((. % 31557600) + 1704067200) / 5000 | floor) * 5000)
|
|
elif . == (. | floor) then
|
|
# Integer - preserve digit count
|
|
(tostring | length) as $len | ("1" * $len) | tonumber
|
|
else
|
|
# Decimal - preserve digit count and leading zero
|
|
tostring | split(".") |
|
|
(.[0] | if . == "0" or . == "-0" then . else ("1" * length) end) as $int_part |
|
|
(.[1] | ("1" * length)) as $frac_part |
|
|
($int_part + "." + $frac_part) | tonumber
|
|
end
|
|
elif type == "boolean" then
|
|
# Replace all booleans with false, this can give sensative info away based
|
|
# on what the key was in the data
|
|
false
|
|
else
|
|
.
|
|
end;
|
|
|
|
def scrub:
|
|
if type == "object" then
|
|
# Apply scrubbing to both keys and values
|
|
with_entries(.key |= scrub_key | .value |= scrub)
|
|
elif type == "array" then
|
|
# Keep only 2 elements, but scrub *those* elements
|
|
.[:2] | map(scrub)
|
|
else
|
|
# Scrub a primitive value
|
|
scrub_primitive
|
|
end;
|
|
|
|
# Call scrub
|
|
#scrub
|