base-data-manager/data-export/google.ts

107 lines
4 KiB
TypeScript

import { TaskTargetPipelineHelper } from "./task.ts";
import { htmlSelectorChunkedDuplex } from "./html.ts";
export function google(this: TaskTargetPipelineHelper){
const p = this.setId(t=>`Google - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
// TODO: There is a root takeout folder
p.collect(col).cd('Access Log Activity/Activities - A list of Google services accessed by.csv').read()
p.collect(col).cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv').read()
// Assignments - data was empty
// Business messages - GMB messages, there's some but so far outside of what I want
// TODO: Calendar, exports an .ics
// a = t.fork().cd(`Chrome`)
// TODO: Assersses and mode.json
// TODO: Bookmarks.csv
// TODO: Device Information.json
// TODO: Dictionary.csv
// TODO: ...
p.collect(col).cd('Chrome/History.json')
.read()
// TODO: Typed Url", no data
// TODO: "session", complex data
// Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
// TODO: time_usec IS WRONG!! Needs to be ms
.cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
(
."Browser History"[]
| [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
)
| @csv`])
// TODO: Contactss, exports an .vcf
// TODO: ...
// a = t.fork().cd(`Google Pay`)
p.collect(col).cd(`Google Pay/Google transactions`).glob(`transactions_*.csv`)
.read()
.csvSink()
// .fork("a").cd(`Money sends and requests`)
// .fork().cd(`Money sends and requests.csv`)
// .read()
// .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
// TODO: One more folder, and it only has a pdf
// TODO: Google Play Movies _ TV - no data
// TODO: ...
p.collect(col).cd("Location History/Location History.json")
.read()
// TODO: This is missing
// "altitude" : 158,
// "verticalAccuracy" : 68
// and the activity models. I had no idea google tries to determine if I'm "tilting"
.cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
(
.locations[]
| [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
)
| @csv`])
.csvSink()
// There's also the semantic history but that's an entire nother can of worms
// it seems like
// TODO: Needs no-headers!
// a = t.fork().cd(`My Activity`)
// a.fork().glob(`**/MyActivity.html`)
// .setId(t=>`Google - ${t.basenameN(2)}`)
// .read()
// .pipe(()=>{
// // Parses the MyActivity format, chunking it into pieces of HTML text
// // and then parsing out the text
// const dup = htmlSelectorChunkedDuplex(
// (tag, attrs)=>{
// // TODO: We also probably want to get and parse each
// // ".content-cell.mdl-typography--caption" as well (it
// // has location for websearches and sometimes a details field)
// // but then we have to get ".mdl-grid" and parse it
// return attrs.class?.includes("content-cell")
// && attrs.class?.includes("mdl-typography--body-1")
// && !attrs.class?.includes("mdl-typography--text-right")
// },
// (chunk)=>{
// const text = chunk.innerText;
// const split = text.split("\n");
// const timestamp = split.pop(); // TODO: need to parse this
// const rest = split.join("\n");
// // TODO: Escape instead of replace
// const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes
// // Return a CSV
// return `"${restSafe}","${timestamp}"\n`;
// }
// );
// return dup;
// })
// TODO: News
// TODO: Profile
// TODO: Tasks - No data
return Array.from(col);
};