* aggregateId is now metadata and it's just aggregate: boolean and uses .id instead * Use csv-parse for tests * Update test snapshots
115 lines
4.3 KiB
TypeScript
115 lines
4.3 KiB
TypeScript
import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
|
|
import { htmlSelectorChunkedDuplex } from "./html.ts";
|
|
|
|
export function google(){
|
|
return pipe(
|
|
// Generic ID for everything in here
|
|
assignMeta({ idValue: t=>`Google - ${t.basename}` }),
|
|
branchGen(function*() {
|
|
// TODO: There is a root takeout folder
|
|
|
|
|
|
yield pipe(cd('Access Log Activity/Activities - A list of Google services accessed by.csv'), read())
|
|
yield pipe(cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv'), read())
|
|
|
|
// Assignments - data was empty
|
|
// Business messages - GMB messages, there's some but so far outside of what I want
|
|
// TODO: Calendar, exports an .ics
|
|
|
|
// a = t.fork().cd(`Chrome`)
|
|
// TODO: Assersses and mode.json
|
|
// TODO: Bookmarks.csv
|
|
// TODO: Device Information.json
|
|
// TODO: Dictionary.csv
|
|
// TODO: ...
|
|
yield pipe(
|
|
cd('Chrome/History.json'),
|
|
read(),
|
|
// TODO: Typed Url", no data
|
|
// TODO: "session", complex data
|
|
// Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
|
|
// TODO: time_usec IS WRONG!! Needs to be ms
|
|
cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
|
|
(
|
|
."Browser History"[]
|
|
| [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
|
|
)
|
|
| @csv
|
|
`])
|
|
);
|
|
|
|
// TODO: Contactss, exports an .vcf
|
|
// TODO: ...
|
|
|
|
// a = t.fork().cd(`Google Pay`)
|
|
yield pipe(
|
|
cd(`Google Pay/Google transactions`),
|
|
glob(`transactions_*.csv`),
|
|
read(),
|
|
// .fork("a").cd(`Money sends and requests`)
|
|
// .fork().cd(`Money sends and requests.csv`)
|
|
// .read()
|
|
// .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
|
|
// TODO: One more folder, and it only has a pdf
|
|
);
|
|
|
|
// TODO: Google Play Movies _ TV - no data
|
|
// TODO: ...
|
|
|
|
yield pipe(
|
|
cd("Location History/Location History.json"),
|
|
read(),
|
|
// TODO: This is missing
|
|
// "altitude" : 158,
|
|
// "verticalAccuracy" : 68
|
|
// and the activity models. I had no idea google tries to determine if I'm "tilting"
|
|
cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
|
|
(
|
|
.locations[]
|
|
| [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
|
|
)
|
|
| @csv
|
|
`])
|
|
);
|
|
// There's also the semantic history but that's an entire nother can of worms
|
|
// it seems like
|
|
|
|
// TODO: Needs no-headers!
|
|
// a = t.fork().cd(`My Activity`)
|
|
// a.fork().glob(`**/MyActivity.html`)
|
|
// .setId(t=>`Google - ${t.basenameN(2)}`)
|
|
// .read()
|
|
// .pipe(()=>{
|
|
// // Parses the MyActivity format, chunking it into pieces of HTML text
|
|
// // and then parsing out the text
|
|
// const dup = htmlSelectorChunkedDuplex(
|
|
// (tag, attrs)=>{
|
|
// // TODO: We also probably want to get and parse each
|
|
// // ".content-cell.mdl-typography--caption" as well (it
|
|
// // has location for websearches and sometimes a details field)
|
|
// // but then we have to get ".mdl-grid" and parse it
|
|
// return attrs.class?.includes("content-cell")
|
|
// && attrs.class?.includes("mdl-typography--body-1")
|
|
// && !attrs.class?.includes("mdl-typography--text-right")
|
|
// },
|
|
// (chunk)=>{
|
|
// const text = chunk.innerText;
|
|
// const split = text.split("\n");
|
|
// const timestamp = split.pop(); // TODO: need to parse this
|
|
// const rest = split.join("\n");
|
|
// // TODO: Escape instead of replace
|
|
// const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes
|
|
// // Return a CSV
|
|
// return `"${restSafe}","${timestamp}"\n`;
|
|
// }
|
|
// );
|
|
// return dup;
|
|
// })
|
|
|
|
// TODO: News
|
|
// TODO: Profile
|
|
// TODO: Tasks - No data
|
|
})
|
|
);
|
|
};
|
|
|