146 lines
No EOL
5.6 KiB
TypeScript
146 lines
No EOL
5.6 KiB
TypeScript
/**Scrubs a primitive the same as scrub.jq
|
|
* @todo Keep in sync with scrub.jq, as ideally they'd just be the same code... but diff
|
|
* languages ugh*/
|
|
export function scrubPrimitive(value: unknown): unknown {
|
|
if (typeof value === "string") {
|
|
if (/^(([0-9]{1,3}\.){3}[0-9]{1,3})$/.test(value)) {
|
|
// IPv4
|
|
return "1.1.1.1";
|
|
}
|
|
else if (/^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$/.test(value)) {
|
|
// IPv6
|
|
return "2000:0000:0000:0000:0000:0000:0000:0000";
|
|
}
|
|
else if (/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(value)) {
|
|
// Email-like
|
|
return "not_a_real_email@example.com";
|
|
}
|
|
else if (/\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$/i.test(value)) {
|
|
// Leave these alone, you will have to manually go through these later and replace with
|
|
// placeholders
|
|
console.error(`MANUAL REPAIR NEEDED: ${value}`);
|
|
return value;
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// iso date time without millis with timezone
|
|
return "2020-04-13T10:09:08+00:00";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?[+-][0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// iso date time with millis with timezone
|
|
return "2020-04-13T10:09:08.000000+00:00";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?$/.test(value)) {
|
|
// iso date time with millis with timezone
|
|
return "2020-04-13T10:09:08.000";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$/.test(value)) {
|
|
// iso date time with z and the end (from fitbit export)
|
|
return "2020-04-13T10:09:08Z";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{1,6}Z$/.test(value)) {
|
|
// iso date time with z and the end (from fitbit export)
|
|
return "2020-04-13T10:09:08.000000Z";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// iso date time no seconds (from fitbit export)
|
|
return "2020-04-13T10:09";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} - [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// iso date time range (from fitbit export)
|
|
return "2020-04-13 10:09:08 - 2020-04-13 10:09:08";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{1,6})?[+-][0-9]{4}$/.test(value)) {
|
|
// iso date time with millis with timezone no colon, no T (from fitbit export)
|
|
return "2020-04-13 10:09:08+0000";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2}$/.test(value)) {
|
|
// just date
|
|
return "2020-04-13";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC$/.test(value)) {
|
|
// Date format from snapchat export
|
|
return "2020-04-13 10:09:08 UTC";
|
|
}
|
|
else if (/^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// Date format from snapchat export
|
|
return "2020-04-13 10:09:08";
|
|
}
|
|
else if (/^[0-9]{2}\/[0-9]{2}\/[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$/.test(value)) {
|
|
// Date format from fitbit export (MM/DD/YY)
|
|
return "04/13/20 10:09:08";
|
|
}
|
|
else if (/^\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} UTC \d{4}$/.test(value)) {
|
|
// Date format from fitbit export (uughhh)
|
|
return "Mon Apr 13 10:09:08 UTC 2020";
|
|
}
|
|
else if (/^\+[0-9]{2}:[0-2]{2}$/.test(value)) {
|
|
// UTC offset (from fitbit export)
|
|
return "+00:00";
|
|
}
|
|
else if (/^-[0-9]{2}:[0-2]{2}$/.test(value)) {
|
|
// UTC offset (from fitbit export)
|
|
return "-00:00"
|
|
}
|
|
else if (/^[0-9]+$/.test(value)) {
|
|
// preserve length of the string
|
|
return "1".repeat(value.length);
|
|
}
|
|
else if (/^-?[0-9]*(\.[0-9]*)?$/.test(value) && /[0-9]/.test(value)) {
|
|
// Decimal string - preserve shape
|
|
return value.replace(/[0-9]/g, "1");
|
|
}
|
|
else if (/^[0-9a-fA-F]+$/.test(value)) {
|
|
// hexadecimal string - repeat the hex pattern and truncate to original length
|
|
return "a1".repeat(value.length)
|
|
.slice(0, value.length);
|
|
}
|
|
else if (value === "") {
|
|
// prevents empty string from just returning null instead of empty string
|
|
return "";
|
|
}
|
|
else if (value === "true" || value === "false") {
|
|
return "false";
|
|
}
|
|
else if (/:\/\//.test(value)) {
|
|
return "url://somewhere";
|
|
}
|
|
else if (/\//.test(value)) {
|
|
return "some/path";
|
|
}
|
|
else if (value === "null") {
|
|
return "null";
|
|
}
|
|
else {
|
|
// Preserve string length for other strings
|
|
return "x".repeat(value.length);
|
|
}
|
|
}
|
|
else if (typeof value === "number") {
|
|
if (946702800 <= value && value <= 1893474000) {
|
|
// Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
|
|
return Math.floor(((value % 31557600) + 1704067200) / 5000) * 5000;
|
|
}
|
|
else if (Number.isInteger(value)) {
|
|
// Integer - preserve digit count
|
|
const len = value.toString()
|
|
.length;
|
|
return Number("1".repeat(len));
|
|
}
|
|
else {
|
|
// Decimal - preserve shape, sign, and leading zero
|
|
const sign = value < 0 ? "-" : "";
|
|
const parts = Math.abs(value).toString().split(".");
|
|
const intPart = parts[0] === "0" ? "0" : "1".repeat(parts[0].length);
|
|
const fracPart = "1".repeat(parts[1].length);
|
|
return Number(`${sign}${intPart}.${fracPart}`);
|
|
}
|
|
}
|
|
else if (typeof value === "boolean") {
|
|
// Replace all booleans with false, this can give sensitive info away based
|
|
// on what the key was in the data
|
|
return false;
|
|
}
|
|
else {
|
|
return value;
|
|
}
|
|
} |