1
0
Fork 0
timelinize/datasources/contactlist/contactlist.go
2026-01-28 21:53:14 -07:00

222 lines
6.5 KiB
Go

/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Package contactlist implements a data source for contact lists.
package contactlist
import (
"context"
"encoding/base64"
"encoding/csv"
"errors"
"fmt"
"io"
"path"
"strings"
"github.com/timelinize/timelinize/datasources/vcard"
"github.com/timelinize/timelinize/timeline"
"go.uber.org/zap"
)
func init() {
err := timeline.RegisterDataSource(timeline.DataSource{
Name: "contact_list",
Title: "Contact List",
Icon: "contact_list.svg",
NewFileImporter: func() timeline.FileImporter { return new(FileImporter) },
})
if err != nil {
timeline.Log.Fatal("registering data source", zap.Error(err))
}
}
// FileImporter can import the data from a file.
type FileImporter struct{}
// FileImport imports data from a file.
func (fimp *FileImporter) FileImport(ctx context.Context, dirEntry timeline.DirEntry, params timeline.ImportParams) error {
// start by assuming the dirEntry points directly to a CSV file
pathInFS := "."
// but if a directory was recognized, alter the path to refer to the CSV file within it
if dirEntry.IsDir() {
pathInFS = path.Base(dirEntry.Name()) + ".csv"
}
bestColumnMapping, bestDelim, err := bestColumnMappingAndDelim(ctx, dirEntry, ".")
if err != nil {
return err
}
// at least 2 fields should be required in order to be useful, right?
// like an email by itself (or a name by itself) has no value I think...
// especially since no items are attached from a contact list
if len(bestColumnMapping) < recognizeAtLeastFields {
return errors.New("insufficient header row")
}
file, err := dirEntry.Open(pathInFS)
if err != nil {
return fmt.Errorf("opening file: %w", err)
}
defer file.Close()
r := csv.NewReader(file)
r.ReuseRecord = true // with this enabled, DO NOT MODIFY THE SLICE RETURNED FROM Read()
r.Comma = bestDelim
var headerRow []string
for {
if err := ctx.Err(); err != nil {
return err
}
row, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("error reading next record: %w", err)
}
// header row
if len(headerRow) == 0 {
headerRow = make([]string, len(row))
copy(headerRow, row)
continue
}
// first, extract mappedValues from recognized columns in the row
mappedValues := make(map[string][]string) // map of canonical field name -> associated value(s) from row
for canonicalField, colIndices := range bestColumnMapping {
for _, colIdx := range colIndices {
mappedValues[canonicalField] = append(mappedValues[canonicalField], row[colIdx])
}
}
// then, convert each field+values pair to something about the person
p := new(timeline.Entity)
var firstName, midName, lastName string
for field, values := range mappedValues {
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
// ignore empty values; especially if there are multiple matched columns
// for a field (like Name, for some reason), don't overwrite a non-empty
// first column with an empty second column
continue
}
switch field {
case "full_name":
p.Name = value
case "first_name":
firstName = value
case "middle_name":
midName = value
case "last_name":
lastName = value
case "birthdate":
birthDate := vcard.ParseBirthday(value)
if birthDate != nil {
p.Attributes = append(p.Attributes, timeline.Attribute{
Name: "birth_date",
Value: value,
})
}
case "picture":
if strings.HasPrefix(value, "http") {
p.NewPicture = timeline.DownloadData(value)
} else {
picBytes, err := base64.RawStdEncoding.DecodeString(value)
if err == nil {
p.NewPicture = timeline.ByteData(picBytes)
}
}
case timeline.AttributeGender:
p.Attributes = append(p.Attributes, timeline.Attribute{
Name: field,
Value: value,
})
case timeline.AttributeEmail,
timeline.AttributePhoneNumber:
p.Attributes = append(p.Attributes, timeline.Attribute{
Name: field,
Value: value,
Identifying: true,
})
}
}
}
// assemble name, if given in different fields
if p.Name == "" {
p.Name = firstName
if midName != "" {
if p.Name != "" {
p.Name += " "
}
p.Name += midName
}
if lastName != "" {
if p.Name != "" {
p.Name += " "
}
p.Name += lastName
}
}
// contact lists from Google Takeout have profile pictures as sidecar files,
// named as a concatenation of their names, or their email address; we can read
// those directly for much faster and more reliable imports, if this import is
// acting on a directory rather than on a regular file
if dirEntry.IsDir() {
pfpPathByName := path.Join(dirEntry.Filename, p.Name) + ".jpg"
if timeline.FileExistsFS(dirEntry.FS, pfpPathByName) {
p.NewPicture = func(_ context.Context) (io.ReadCloser, error) {
return dirEntry.FS.Open(pfpPathByName)
}
} else if attr, ok := p.Attribute(timeline.AttributeEmail); ok && attr.Value != nil {
pfpPathByEmail := path.Join(dirEntry.Filename, attr.Value.(string)) + ".jpg"
if timeline.FileExistsFS(dirEntry.FS, pfpPathByEmail) {
p.NewPicture = func(_ context.Context) (io.ReadCloser, error) {
return dirEntry.FS.Open(pfpPathByEmail)
}
}
}
}
// I think it's pointless to process a person if there aren't at
// least 2 data points about them because we can get single
// data points from nearly any data source; the value of adding
// a contact list is to get more information about a person to
// infer more relationships automatically.
if (p.NewPicture == nil && len(p.Metadata) == 0 && len(p.Attributes) == 0) || // only a name is kinda useless
(p.Name == "" && p.NewPicture != nil && len(p.Attributes)+len(p.Metadata) == 0) {
continue
}
// finally, send person for processing
params.Pipeline <- &timeline.Graph{Entity: p}
}
return nil
}