base-data-manager/data-export/google.ts

import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
import { htmlSelectorChunkedDuplex } from "./html.ts";

export function google(){
  return pipe(
    // Generic ID for everything in here
    assignMeta({ idValue: t=>`Google - ${t.basename}` }),
    branchGen(function*() {
      // TODO: There is a root takeout folder


      yield pipe(cd('Access Log Activity/Activities - A list of Google services accessed by.csv'), read())
      yield pipe(cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv'), read())

      // Assignments - data was empty
      // Business messages - GMB messages, there's some but so far outside of what I want
      // TODO: Calendar, exports an .ics

      // a = t.fork().cd(`Chrome`)
        // TODO: Assersses and mode.json
        // TODO: Bookmarks.csv
        // TODO: Device Information.json
        // TODO: Dictionary.csv
        // TODO: ...
      yield pipe(
        cd('Chrome/History.json'),
        read(),
        // TODO: Typed Url", no data
        // TODO: "session", complex data
        // Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
        // TODO: time_usec IS WRONG!! Needs to be ms
        cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
          (
            ."Browser History"[]
            | [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
          )
          | @csv
        `])
      );

      // TODO: Contactss, exports an .vcf
      // TODO: ...

      // a = t.fork().cd(`Google Pay`)
      yield pipe(
        cd(`Google Pay/Google transactions`),
        glob(`transactions_*.csv`),
        read(),
        // .fork("a").cd(`Money sends and requests`)
        //   .fork().cd(`Money sends and requests.csv`)
        //     .read()
        //     .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
        // TODO: One more folder, and it only has a pdf
      );

      // TODO: Google Play Movies _ TV - no data
      // TODO: ...

      yield pipe(
        cd("Location History/Location History.json"),
        read(),
        // TODO: This is missing
        // "altitude" : 158,
        // "verticalAccuracy" : 68
        // and the activity models. I had no idea google tries to determine if I'm "tilting"
        cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
          (
            .locations[]
            | [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
          )
          | @csv
        `])
      );
        // There's also the semantic history but that's an entire nother can of worms
        // it seems like

      // TODO: Needs no-headers!
      // a = t.fork().cd(`My Activity`)
      //   a.fork().glob(`**/MyActivity.html`)
      //     .setId(t=>`Google - ${t.basenameN(2)}`)
      //     .read()
      //     .pipe(()=>{
      //       // Parses the MyActivity format, chunking it into pieces of HTML text
      //       // and then parsing out the text
      //       const dup = htmlSelectorChunkedDuplex(
      //         (tag, attrs)=>{
      //           // TODO: We also probably want to get and parse each
      //           // ".content-cell.mdl-typography--caption" as well (it
      //           // has location for websearches and sometimes a details field)
      //           // but then we have to get ".mdl-grid" and parse it
      //           return attrs.class?.includes("content-cell")
      //             && attrs.class?.includes("mdl-typography--body-1")
      //             && !attrs.class?.includes("mdl-typography--text-right")
      //         },
      //         (chunk)=>{
      //           const text = chunk.innerText;
      //           const split = text.split("\n");
      //           const timestamp = split.pop(); // TODO: need to parse this
      //           const rest = split.join("\n");
      //           // TODO: Escape instead of replace
      //           const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n");  // escape newlines and quotes
      //           // Return a CSV
      //           return `"${restSafe}","${timestamp}"\n`;
      //         }
      //       );
      //       return dup;
      //     })

      // TODO: News
      // TODO: Profile
      // TODO: Tasks - No data
    })
  );
};