package whatsapp_test import ( "bytes" "context" "fmt" "io" "os" "testing" "github.com/timelinize/timelinize/datasources/whatsapp" "github.com/timelinize/timelinize/timeline" ) func TestFileImport(t *testing.T) { // Setup fixtures := os.DirFS("testdata/fixtures") dirEntry := timeline.DirEntry{ FS: fixtures, } // The buffer for messages is 100 here; significantly more lines than there are in the chat log pipeline := make(chan *timeline.Graph, 100) params := timeline.ImportParams{Pipeline: pipeline} // Run the import runErr := new(whatsapp.Importer).FileImport(context.Background(), dirEntry, params) if runErr != nil { t.Errorf("unable to import file: %v", runErr) } close(params.Pipeline) // Expectations expected := []struct { owner string index int text string attachments []string metadata map[string]any }{ // Messages and calls are end-to-end encrypted // Person 2 changed their phone number {owner: "Person 2", index: 3, text: "A message"}, {owner: "Person 1", index: 4, text: "A reply with _italic_ and *bold* and ~strikethrough~ and `monospace` and emoji ☺️"}, {owner: "Person 3", index: 5, text: "Someone else\r\nwith a lot to say\r\nover multiple lines!"}, {owner: "Person 1", index: 6, text: "", attachments: []string{"some-image.jpg"}}, {owner: "Person 2", index: 7, text: "A retort"}, // You deleted this message {owner: "Person 3", index: 9, text: "How rude"}, {owner: "Person 1", index: 10, text: "", attachments: []string{"some-doc.pdf"}}, {owner: "Person 1", index: 11, text: "A question\r\n- Option A (☑︎ 1)\r\n- Option B (☑︎ 2)", metadata: map[string]any{ "Poll question": "A question", "Poll option 1": "Option A", "Poll votes 1": 1, "Poll option 2": "Option B", "Poll votes 2": 2, }}, {owner: "Person 2", index: 12, text: "British Library (96 Euston Rd, London, Greater London NW1 2DB): https://foursquare.com/v/4ac518cef964a52019a620e3", metadata: map[string]any{ "Pin foursquare id": "4ac518cef964a52019a620e3", }}, {owner: "Person 3", index: 13, text: "Location: https://maps.google.com/?q=51.513767,-0.098266", metadata: map[string]any{ "Pin latitude": 51.513767, "Pin longitude": -0.098266, }}, // Missing image means message is ignored // Missed voice call omitted // Taken video call omitted // Deleted message omitted {owner: "Person 2", index: 18, text: "An edited message"}, {owner: "Persona español", index: 19, text: "Una pregunta\r\n- Opción A (☑︎ 0)\r\n- Opción B (☑︎ 1)\r\n- Opción C (☑︎ 2)", metadata: map[string]any{ "Poll question": "Una pregunta", "Poll option 1": "Opción A", "Poll votes 1": 0, "Poll option 2": "Opción B", "Poll votes 2": 1, "Poll option 3": "Opción C", "Poll votes 3": 2, }}, } i := 0 for message := range pipeline { if i >= len(expected) { i++ continue } if message.Item.Owner.Name != expected[i].owner { t.Fatalf("incorrect owner for message %d, wanted %s but was %s", i, expected[i].owner, message.Item.Owner.Name) } expectedMonth := ((expected[i].index - 1) % 12) + 1 if int(message.Item.Timestamp.Month()) != expectedMonth { t.Fatalf("incorrect month for message %d, wanted %d but was %d", i, expected[i].index, int(message.Item.Timestamp.Month())) } // TODO: Timestamp timezones need to all be interpreted as "local time, at the time" // ie. 2020-01-01 00:00:00 should be interpreted as midnight GMT, but 2020-06-01 00:00:00 should be interpreted as midnight BST if message.Item.Timestamp.Hour() != 12 { t.Fatalf("message %d should have been near midday, but was %d", i, message.Item.Timestamp.Hour()) } if message.Item.Timestamp.Second() != expected[i].index { t.Fatalf("incorrect second for message %d, wanted %d but was %d", i, expected[i].index, message.Item.Timestamp.Second()) } validateItemData(t, "", expected[i].text, message.Item.Content, "incorrect text for message %d", i) if len(expected[i].attachments) > 0 { var attachedItems []*timeline.Item for _, edge := range message.Edges { if edge.Relation == timeline.RelAttachment { attachedItems = append(attachedItems, edge.To.Item) } } if len(attachedItems) != len(expected[i].attachments) { t.Fatalf("incorrect number of attachments for message %d", i) } for j, filename := range expected[i].attachments { expFile, err := fixtures.Open(filename) if err != nil { t.Errorf("unable to open fixture file (%s) as test data: %v", filename, err) } validateItemData(t, filename, expFile, attachedItems[j].Content, "incorrect %dth attachment for message %d", j, i) } } for key, expectedValue := range expected[i].metadata { if actualValue, ok := message.Item.Metadata[key]; ok { if actualValue != expectedValue { t.Fatalf("metadata value for %s is incorrect, wanted %v (%T), but was %v (%T)", key, expectedValue, expectedValue, actualValue, actualValue) } } else { t.Fatalf("metadata value for %s was missing", key) } } i++ } if i != len(expected) { t.Fatalf("received %d messages instead of %d", i, len(expected)) } } func validateItemData(t *testing.T, expectedFilename string, expectedData any, itemData timeline.ItemData, errorMessage string, errorArgs ...any) { errMsg := fmt.Sprintf(errorMessage, errorArgs...) expectedStr, isStr := expectedData.(string) var expectedBytes []byte if expectedData == nil || (isStr && expectedStr == "") { if itemData.Data != nil { t.Fatalf("%s; should not have had a DataFunc", errMsg) } return } else if exp, ok := expectedData.(io.Reader); ok { var err error expectedBytes, err = io.ReadAll(exp) if err != nil { t.Errorf("%s; couldn't read expected data: %v", errMsg, err) return } } else if isStr { expectedBytes = []byte(expectedStr) } else if exp, ok := expectedData.([]byte); ok { expectedBytes = exp } else { t.Errorf("%s; unable to check content with expected data type: %T", errMsg, expectedData) return } if itemData.Data == nil { t.Fatalf("%s; DataFunc should be non-nil", errMsg) return } actualR, err := itemData.Data(context.Background()) if err != nil { t.Fatalf("%s; unable to retrieve actual data from dataFunc", errMsg) return } actualBytes, err := io.ReadAll(actualR) if err != nil { t.Fatalf("%s; unable read data from dataFunc reader", errMsg) return } if expectedFilename != "" && itemData.Filename != expectedFilename { t.Fatalf("%s; filename incorrect, wanted %s but was %s", errMsg, expectedFilename, itemData.Filename) } if !bytes.Equal(actualBytes, expectedBytes) { t.Fatalf("%s; item data incorrect, wanted:\n %v\n %s\nbut got:\n %v\n %s", errMsg, expectedBytes, string(expectedBytes), actualBytes, string(actualBytes)) } }