import { TaskTargetPipelineHelper } from "./task.ts"; import { htmlSelectorChunkedDuplex } from "./html.ts"; export function google(this: TaskTargetPipelineHelper){ const p = this.setId(t=>`Google - ${t.basename}`); // Generic ID for everything in here const col: Set = new Set(); // TODO: There is a root takeout folder p.collect(col).cd('Access Log Activity/Activities - A list of Google services accessed by.csv').read() p.collect(col).cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv').read() // Assignments - data was empty // Business messages - GMB messages, there's some but so far outside of what I want // TODO: Calendar, exports an .ics // a = t.fork().cd(`Chrome`) // TODO: Assersses and mode.json // TODO: Bookmarks.csv // TODO: Device Information.json // TODO: Dictionary.csv // TODO: ... p.collect(col).cd('Chrome/History.json') .read() // TODO: Typed Url", no data // TODO: "session", complex data // Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something... // TODO: time_usec IS WRONG!! Needs to be ms .cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"], ( ."Browser History"[] | [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)] ) | @csv`]) // TODO: Contactss, exports an .vcf // TODO: ... // a = t.fork().cd(`Google Pay`) p.collect(col).cd(`Google Pay/Google transactions`).glob(`transactions_*.csv`) .read() .csvSink() // .fork("a").cd(`Money sends and requests`) // .fork().cd(`Money sends and requests.csv`) // .read() // .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"]) // TODO: One more folder, and it only has a pdf // TODO: Google Play Movies _ TV - no data // TODO: ... p.collect(col).cd("Location History/Location History.json") .read() // TODO: This is missing // "altitude" : 158, // "verticalAccuracy" : 68 // and the activity models. I had no idea google tries to determine if I'm "tilting" .cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"], ( .locations[] | [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy] ) | @csv`]) .csvSink() // There's also the semantic history but that's an entire nother can of worms // it seems like // TODO: Needs no-headers! // a = t.fork().cd(`My Activity`) // a.fork().glob(`**/MyActivity.html`) // .setId(t=>`Google - ${t.basenameN(2)}`) // .read() // .pipe(()=>{ // // Parses the MyActivity format, chunking it into pieces of HTML text // // and then parsing out the text // const dup = htmlSelectorChunkedDuplex( // (tag, attrs)=>{ // // TODO: We also probably want to get and parse each // // ".content-cell.mdl-typography--caption" as well (it // // has location for websearches and sometimes a details field) // // but then we have to get ".mdl-grid" and parse it // return attrs.class?.includes("content-cell") // && attrs.class?.includes("mdl-typography--body-1") // && !attrs.class?.includes("mdl-typography--text-right") // }, // (chunk)=>{ // const text = chunk.innerText; // const split = text.split("\n"); // const timestamp = split.pop(); // TODO: need to parse this // const rest = split.join("\n"); // // TODO: Escape instead of replace // const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes // // Return a CSV // return `"${restSafe}","${timestamp}"\n`; // } // ); // return dup; // }) // TODO: News // TODO: Profile // TODO: Tasks - No data return Array.from(col); };