base-data-manager/util/scrub_primitive.ts

146 lines
No EOL
5.6 KiB
TypeScript

/**Scrubs a primitive the same as scrub.jq
* @todo Keep in sync with scrub.jq, as ideally they'd just be the same code... but diff
* languages ugh*/
export function scrubPrimitive(value: unknown): unknown {
if (typeof value === "string") {
if (/^(([0-9]{1,3}\.){3}[0-9]{1,3})$/.test(value)) {
// IPv4
return "1.1.1.1";
}
else if (/^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$/.test(value)) {
// IPv6
return "2000:0000:0000:0000:0000:0000:0000:0000";
}
else if (/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(value)) {
// Email-like
return "not_a_real_email@example.com";
}
else if (/\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$/i.test(value)) {
// Leave these alone, you will have to manually go through these later and replace with
// placeholders
console.error(`MANUAL REPAIR NEEDED: ${value}`);
return value;
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$/.test(value)) {
// iso date time without millis with timezone
return "2020-04-13T10:09:08+00:00";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?[+-][0-9]{2}:[0-9]{2}$/.test(value)) {
// iso date time with millis with timezone
return "2020-04-13T10:09:08.000000+00:00";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?$/.test(value)) {
// iso date time with millis with timezone
return "2020-04-13T10:09:08.000";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/.test(value)) {
// iso date time with z and the end (from fitbit export)
return "2020-04-13T10:09:08Z";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{1,6}Z$/.test(value)) {
// iso date time with z and the end (from fitbit export)
return "2020-04-13T10:09:08.000000Z";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}$/.test(value)) {
// iso date time no seconds (from fitbit export)
return "2020-04-13T10:09";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
// iso date time range (from fitbit export)
return "2020-04-13 10:09:08 - 2020-04-13 10:09:08";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?[+-][0-9]{4}$/.test(value)) {
// iso date time with millis with timezone no colon, no T (from fitbit export)
return "2020-04-13 10:09:08+0000";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}$/.test(value)) {
// just date
return "2020-04-13";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC$/.test(value)) {
// Date format from snapchat export
return "2020-04-13 10:09:08 UTC";
}
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
// Date format from snapchat export
return "2020-04-13 10:09:08";
}
else if (/^[0-9]{2}\/[0-9]{2}\/[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
// Date format from fitbit export (MM/DD/YY)
return "04/13/20 10:09:08";
}
else if (/^\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} UTC \d{4}$/.test(value)) {
// Date format from fitbit export (uughhh)
return "Mon Apr 13 10:09:08 UTC 2020";
}
else if (/^\+[0-9]{2}:[0-2]{2}$/.test(value)) {
// UTC offset (from fitbit export)
return "+00:00";
}
else if (/^-[0-9]{2}:[0-2]{2}$/.test(value)) {
// UTC offset (from fitbit export)
return "-00:00"
}
else if (/^[0-9]+$/.test(value)) {
// preserve length of the string
return "1".repeat(value.length);
}
else if (/^-?[0-9]*(\.[0-9]*)?$/.test(value) && /[0-9]/.test(value)) {
// Decimal string - preserve shape
return value.replace(/[0-9]/g, "1");
}
else if (/^[0-9a-fA-F]+$/.test(value)) {
// hexadecimal string - repeat the hex pattern and truncate to original length
return "a1".repeat(value.length)
.slice(0, value.length);
}
else if (value === "") {
// prevents empty string from just returning null instead of empty string
return "";
}
else if (value === "true" || value === "false") {
return "false";
}
else if (/:\/\//.test(value)) {
return "url://somewhere";
}
else if (/\//.test(value)) {
return "some/path";
}
else if (value === "null") {
return "null";
}
else {
// Preserve string length for other strings
return "x".repeat(value.length);
}
}
else if (typeof value === "number") {
if (946702800 <= value && value <= 1893474000) {
// Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
return Math.floor(((value % 31557600) + 1704067200) / 5000) * 5000;
}
else if (Number.isInteger(value)) {
// Integer - preserve digit count
const len = value.toString()
.length;
return Number("1".repeat(len));
}
else {
// Decimal - preserve shape, sign, and leading zero
const sign = value < 0 ? "-" : "";
const parts = Math.abs(value).toString().split(".");
const intPart = parts[0] === "0" ? "0" : "1".repeat(parts[0].length);
const fracPart = "1".repeat(parts[1].length);
return Number(`${sign}${intPart}.${fracPart}`);
}
}
else if (typeof value === "boolean") {
// Replace all booleans with false, this can give sensitive info away based
// on what the key was in the data
return false;
}
else {
return value;
}
}