Well this is embarrassing, I forgot to actually test the metadata _and_ the keys emitted weren't correct!
200 lines
6.7 KiB
Go
200 lines
6.7 KiB
Go
package whatsapp_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"testing"
|
|
|
|
"github.com/timelinize/timelinize/datasources/whatsapp"
|
|
"github.com/timelinize/timelinize/timeline"
|
|
)
|
|
|
|
func TestFileImport(t *testing.T) {
|
|
// Setup
|
|
fixtures := os.DirFS("testdata/fixtures")
|
|
dirEntry := timeline.DirEntry{
|
|
FS: fixtures,
|
|
}
|
|
|
|
// The buffer for messages is 100 here; significantly more lines than there are in the chat log
|
|
pipeline := make(chan *timeline.Graph, 100)
|
|
params := timeline.ImportParams{Pipeline: pipeline}
|
|
|
|
// Run the import
|
|
runErr := new(whatsapp.Importer).FileImport(context.Background(), dirEntry, params)
|
|
if runErr != nil {
|
|
t.Errorf("unable to import file: %v", runErr)
|
|
}
|
|
close(params.Pipeline)
|
|
|
|
// Expectations
|
|
expected := []struct {
|
|
owner string
|
|
index int
|
|
text string
|
|
attachments []string
|
|
metadata map[string]any
|
|
}{
|
|
// Messages and calls are end-to-end encrypted
|
|
// Person 2 changed their phone number
|
|
{owner: "Person 2", index: 3, text: "A message"},
|
|
{owner: "Person 1", index: 4, text: "A reply with _italic_ and *bold* and ~strikethrough~ and `monospace` and emoji ☺️"},
|
|
{owner: "Person 3", index: 5, text: "Someone else\r\nwith a lot to say\r\nover multiple lines!"},
|
|
{owner: "Person 1", index: 6, text: "", attachments: []string{"some-image.jpg"}},
|
|
{owner: "Person 2", index: 7, text: "A retort"},
|
|
// You deleted this message
|
|
{owner: "Person 3", index: 9, text: "How rude"},
|
|
{owner: "Person 1", index: 10, text: "", attachments: []string{"some-doc.pdf"}},
|
|
{owner: "Person 1", index: 11, text: "A question\r\n- Option A (☑︎ 1)\r\n- Option B (☑︎ 2)",
|
|
metadata: map[string]any{
|
|
"Poll question": "A question",
|
|
"Poll option 1": "Option A",
|
|
"Poll votes 1": 1,
|
|
"Poll option 2": "Option B",
|
|
"Poll votes 2": 2,
|
|
}},
|
|
{owner: "Person 2", index: 12, text: "British Library (96 Euston Rd, London, Greater London NW1 2DB): https://foursquare.com/v/4ac518cef964a52019a620e3",
|
|
metadata: map[string]any{
|
|
"Pin foursquare id": "4ac518cef964a52019a620e3",
|
|
}},
|
|
{owner: "Person 3", index: 13, text: "Location: https://maps.google.com/?q=51.513767,-0.098266",
|
|
metadata: map[string]any{
|
|
"Pin latitude": 51.513767,
|
|
"Pin longitude": -0.098266,
|
|
}},
|
|
// Missing image means message is ignored
|
|
// Missed voice call omitted
|
|
// Taken video call omitted
|
|
// Deleted message omitted
|
|
{owner: "Person 2", index: 18, text: "An edited message"},
|
|
{owner: "Persona español", index: 19, text: "Una pregunta\r\n- Opción A (☑︎ 0)\r\n- Opción B (☑︎ 1)\r\n- Opción C (☑︎ 2)",
|
|
metadata: map[string]any{
|
|
"Poll question": "Una pregunta",
|
|
"Poll option 1": "Opción A",
|
|
"Poll votes 1": 0,
|
|
"Poll option 2": "Opción B",
|
|
"Poll votes 2": 1,
|
|
"Poll option 3": "Opción C",
|
|
"Poll votes 3": 2,
|
|
}},
|
|
}
|
|
|
|
i := 0
|
|
for message := range pipeline {
|
|
if i >= len(expected) {
|
|
i++
|
|
continue
|
|
}
|
|
|
|
if message.Item.Owner.Name != expected[i].owner {
|
|
t.Fatalf("incorrect owner for message %d, wanted %s but was %s", i, expected[i].owner, message.Item.Owner.Name)
|
|
}
|
|
|
|
expectedMonth := ((expected[i].index - 1) % 12) + 1
|
|
if int(message.Item.Timestamp.Month()) != expectedMonth {
|
|
t.Fatalf("incorrect month for message %d, wanted %d but was %d", i, expected[i].index, int(message.Item.Timestamp.Month()))
|
|
}
|
|
// TODO: Timestamp timezones need to all be interpreted as "local time, at the time"
|
|
// ie. 2020-01-01 00:00:00 should be interpreted as midnight GMT, but 2020-06-01 00:00:00 should be interpreted as midnight BST
|
|
if message.Item.Timestamp.Hour() != 12 {
|
|
t.Fatalf("message %d should have been near midday, but was %d", i, message.Item.Timestamp.Hour())
|
|
}
|
|
if message.Item.Timestamp.Second() != expected[i].index {
|
|
t.Fatalf("incorrect second for message %d, wanted %d but was %d", i, expected[i].index, message.Item.Timestamp.Second())
|
|
}
|
|
|
|
validateItemData(t, "", expected[i].text, message.Item.Content, "incorrect text for message %d", i)
|
|
|
|
if len(expected[i].attachments) > 0 {
|
|
var attachedItems []*timeline.Item
|
|
for _, edge := range message.Edges {
|
|
if edge.Relation == timeline.RelAttachment {
|
|
attachedItems = append(attachedItems, edge.To.Item)
|
|
}
|
|
}
|
|
if len(attachedItems) != len(expected[i].attachments) {
|
|
t.Fatalf("incorrect number of attachments for message %d", i)
|
|
}
|
|
|
|
for j, filename := range expected[i].attachments {
|
|
expFile, err := fixtures.Open(filename)
|
|
if err != nil {
|
|
t.Errorf("unable to open fixture file (%s) as test data: %v", filename, err)
|
|
}
|
|
|
|
validateItemData(t, filename, expFile, attachedItems[j].Content, "incorrect %dth attachment for message %d", j, i)
|
|
}
|
|
}
|
|
|
|
for key, expectedValue := range expected[i].metadata {
|
|
if actualValue, ok := message.Item.Metadata[key]; ok {
|
|
if actualValue != expectedValue {
|
|
t.Fatalf("metadata value for %s is incorrect, wanted %v (%T), but was %v (%T)", key, expectedValue, expectedValue, actualValue, actualValue)
|
|
}
|
|
} else {
|
|
t.Fatalf("metadata value for %s was missing", key)
|
|
}
|
|
}
|
|
|
|
i++
|
|
}
|
|
|
|
if i != len(expected) {
|
|
t.Fatalf("received %d messages instead of %d", i, len(expected))
|
|
}
|
|
}
|
|
|
|
func validateItemData(t *testing.T, expectedFilename string, expectedData any, itemData timeline.ItemData, errorMessage string, errorArgs ...any) {
|
|
errMsg := fmt.Sprintf(errorMessage, errorArgs...)
|
|
|
|
expectedStr, isStr := expectedData.(string)
|
|
|
|
var expectedBytes []byte
|
|
if expectedData == nil || (isStr && expectedStr == "") {
|
|
if itemData.Data != nil {
|
|
t.Fatalf("%s; should not have had a DataFunc", errMsg)
|
|
}
|
|
return
|
|
} else if exp, ok := expectedData.(io.Reader); ok {
|
|
var err error
|
|
expectedBytes, err = io.ReadAll(exp)
|
|
if err != nil {
|
|
t.Errorf("%s; couldn't read expected data: %v", errMsg, err)
|
|
return
|
|
}
|
|
} else if isStr {
|
|
expectedBytes = []byte(expectedStr)
|
|
} else if exp, ok := expectedData.([]byte); ok {
|
|
expectedBytes = exp
|
|
} else {
|
|
t.Errorf("%s; unable to check content with expected data type: %T", errMsg, expectedData)
|
|
return
|
|
}
|
|
|
|
if itemData.Data == nil {
|
|
t.Fatalf("%s; DataFunc should be non-nil", errMsg)
|
|
return
|
|
}
|
|
|
|
actualR, err := itemData.Data(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("%s; unable to retrieve actual data from dataFunc", errMsg)
|
|
return
|
|
}
|
|
|
|
actualBytes, err := io.ReadAll(actualR)
|
|
if err != nil {
|
|
t.Fatalf("%s; unable read data from dataFunc reader", errMsg)
|
|
return
|
|
}
|
|
|
|
if expectedFilename != "" && itemData.Filename != expectedFilename {
|
|
t.Fatalf("%s; filename incorrect, wanted %s but was %s", errMsg, expectedFilename, itemData.Filename)
|
|
}
|
|
if !bytes.Equal(actualBytes, expectedBytes) {
|
|
t.Fatalf("%s; item data incorrect, wanted:\n %v\n %s\nbut got:\n %v\n %s", errMsg, expectedBytes, string(expectedBytes), actualBytes, string(actualBytes))
|
|
}
|
|
}
|