1
0
Fork 0
timelinize/datasources/facebook/models.go
2026-01-16 23:32:10 -07:00

395 lines
13 KiB
Go

package facebook
import (
"fmt"
"path"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/timelinize/timelinize/timeline"
"go.uber.org/zap"
)
// places_you_have_been_tagged_in.json (year 2024+)
type fbTaggedPlaces []struct {
Media []any `json:"media"`
LabelValues []fbLabelValue `json:"label_values"`
FBID string `json:"fbid"` // Facebook ID
}
// Generated January 2023
type profileInfo struct {
ProfileV2 struct {
Name struct {
FullName string `json:"full_name"`
FirstName string `json:"first_name"`
MiddleName string `json:"middle_name"`
LastName string `json:"last_name"`
} `json:"name"`
Emails struct {
Emails []string `json:"emails"`
PreviousEmails []string `json:"previous_emails"`
PendingEmails []any `json:"pending_emails"`
AdAccountEmails []any `json:"ad_account_emails"`
} `json:"emails"`
Birthday fbDate `json:"birthday"`
Gender struct {
GenderOption string `json:"gender_option"`
Pronoun string `json:"pronoun"`
} `json:"gender"`
PreviousNames []any `json:"previous_names"`
OtherNames []struct {
Name string `json:"name"`
Type string `json:"type"`
Timestamp int `json:"timestamp"`
} `json:"other_names"`
Hometown struct {
Name string `json:"name"`
Timestamp int `json:"timestamp"`
} `json:"hometown"`
Relationship struct {
Status string `json:"status"`
Partner string `json:"partner"`
Anniversary fbDate `json:"anniversary"`
Timestamp int `json:"timestamp"`
} `json:"relationship"`
FamilyMembers []struct {
Name string `json:"name"`
Relation string `json:"relation"`
Timestamp int `json:"timestamp"`
} `json:"family_members"`
EducationExperiences []struct {
Name string `json:"name"`
StartTimestamp int `json:"start_timestamp,omitempty"`
EndTimestamp int `json:"end_timestamp"`
Graduated bool `json:"graduated"`
Concentrations []string `json:"concentrations"`
Degree string `json:"degree,omitempty"`
SchoolType string `json:"school_type"`
Timestamp int `json:"timestamp"`
} `json:"education_experiences"`
WorkExperiences []any `json:"work_experiences"`
BloodInfo struct {
BloodDonorStatus string `json:"blood_donor_status"`
} `json:"blood_info"`
Websites []struct {
Address string `json:"address"`
} `json:"websites"`
PhoneNumbers []struct {
PhoneType string `json:"phone_type"`
PhoneNumber string `json:"phone_number"`
Verified bool `json:"verified"`
} `json:"phone_numbers"`
Username string `json:"username"`
RegistrationTimestamp int `json:"registration_timestamp"`
ProfileURI string `json:"profile_uri"`
} `json:"profile_v2"`
}
type fbDate struct {
Year int `json:"year"`
Month int `json:"month"`
Day int `json:"day"`
}
type yourPosts []struct {
Timestamp int64 `json:"timestamp"`
Attachments []struct {
Data []struct {
Text string `json:"text"`
ExternalContext struct {
Name string `json:"name"`
URL string `json:"url"`
} `json:"external_context"`
Media fbArchiveMedia `json:"media"`
Place struct {
Name string `json:"name"`
Coordinate struct {
Latitude float64 `json:"latitude"`
Longitude float64 `json:"longitude"`
} `json:"coordinate"`
Address string `json:"address"`
URL string `json:"url"`
} `json:"place"`
} `json:"data"`
} `json:"attachments,omitempty"`
Data []struct {
Post string `json:"post"`
} `json:"data"`
Title string `json:"title,omitempty"`
Tags []struct {
Name string `json:"name"`
} `json:"tags,omitempty"`
}
type fbYourUncategorizedPhotos struct {
OtherPhotosV2 []fbArchiveMedia `json:"other_photos_v2"`
}
type fbYourVideos struct {
VideosV2 []fbArchiveMedia `json:"videos_v2"`
}
type fbArchiveMedia struct {
URI string `json:"uri"`
CreationTimestamp int64 `json:"creation_timestamp"`
MediaMetadata struct {
PhotoMetadata *struct {
EXIFData []fbEXIFData `json:"exif_data"`
} `json:"photo_metadata"`
VideoMetadata *struct {
EXIFData []struct {
UploadIP string `json:"upload_ip"`
UploadTimestamp int64 `json:"upload_timestamp"`
} `json:"exif_data"`
} `json:"video_metadata"`
} `json:"media_metadata"`
Title string `json:"title"`
Description string `json:"description"`
}
func (m fbArchiveMedia) fillItem(item *timeline.Item, d timeline.DirEntry, postText string, logger *zap.Logger) {
// the media item might not be in this archive if it's a multi-archive export, so set a retrieval key so we can
// fill the item in later
retKey := retrievalKey(d, m.URI)
item.Retrieval.SetKey(retKey)
item.Retrieval.FieldUpdatePolicies = map[string]timeline.FieldUpdatePolicy{
"intermediate_location": timeline.UpdatePolicyPreferExisting,
"timestamp": timeline.UpdatePolicyPreferIncoming,
"location": timeline.UpdatePolicyPreferIncoming,
"owner": timeline.UpdatePolicyPreferIncoming,
"data": timeline.UpdatePolicyPreferExisting,
"metadata": timeline.UpdatePolicyPreferIncoming,
}
item.IntermediateLocation = m.URI
if m.CreationTimestamp > 0 {
item.Timestamp = time.Unix(m.CreationTimestamp, 0).UTC()
}
if item.Content.Data == nil {
item.Content = timeline.ItemData{
Filename: path.Base(m.URI),
// don't set the Data func here, because it might not be in this archive at all;
// we traverse media folder separately and use retrieval key to fill in the data file
}
}
if item.Metadata == nil {
item.Metadata = make(timeline.Metadata)
}
item.Metadata["Title"] = FixString(m.Title)
if desc := FixString(m.Description); desc != postText {
// TODO: this filter probably doesn't work with multiple attachment data where some are text with the same description
item.Metadata["Description"] = FixString(m.Description)
}
// TODO: use media importer functions for this as well...
// Collect metadata... while we do, prefer TakenTimestamp over CreationTimestamp;
// I think TakenTimestamp is when the media was captured, and CreationTimestamp is
// when the post or attachment was created on Facebook. I think.
// We also include all the EXIF data Facebook gives us because they strip the EXIF
// data from the files they give us.
if m.MediaMetadata.PhotoMetadata != nil {
for _, exif := range m.MediaMetadata.PhotoMetadata.EXIFData {
if exif.TakenTimestamp != 0 { // negative is valid! (pre-1970)
item.Timestamp = time.Unix(exif.TakenTimestamp, 0).UTC()
// I've seen this happen where the value is -62169958800 (for multiple items!)
// which results in a Very Wrong Timestamp. I don't know what to make of this
// value, so we have to simply throw it away.
if item.Timestamp.Year() <= 0 {
logger.Warn("data source provided TakenTimestamp that is before year 0; not using it", zap.Time("taken_timestamp", item.Timestamp))
item.Timestamp = time.Time{}
}
}
if lat := exif.Latitude; lat != 0 {
item.Location.Latitude = &lat
}
if lon := exif.Longitude; lon != 0 {
item.Location.Longitude = &lon
}
item.Metadata["ISO"] = exif.ISO
item.Metadata["Focal length"] = exif.FocalLength
item.Metadata["Upload IP"] = exif.UploadIP
if exif.ModifiedTimestamp > 0 {
item.Metadata["Modified"] = time.Unix(exif.ModifiedTimestamp, 0).UTC()
}
item.Metadata["Camera make"] = exif.CameraMake
item.Metadata["Camera model"] = exif.CameraModel
item.Metadata["Exposure"] = exif.Exposure
item.Metadata["F-stop"] = exif.FStop
item.Metadata["Orientation"] = exif.Orientation
item.Metadata["Original width"] = exif.OriginalWidth
item.Metadata["Original height"] = exif.OriginalHeight
}
} else if m.MediaMetadata.VideoMetadata != nil {
for _, exif := range m.MediaMetadata.VideoMetadata.EXIFData {
item.Metadata["Upload IP"] = exif.UploadTimestamp
item.Metadata["Upload timestamp"] = time.Unix(exif.UploadTimestamp, 0).UTC()
}
}
}
func retrievalKey(d timeline.DirEntry, pathOrURI string) string {
archiveName := filepath.Base(d.FullPath())
splitAt := strings.LastIndex(archiveName, "-")
exportID := archiveName
if splitAt > -1 {
exportID = archiveName[:splitAt]
}
return fmt.Sprintf("facebook::%s::%s", exportID, pathOrURI)
}
type fbMessengerThread struct {
Participants []struct {
Name string `json:"name"`
} `json:"participants"`
Messages []struct {
SenderName string `json:"sender_name"`
TimestampMS int64 `json:"timestamp_ms"`
IsUnsent bool `json:"is_unsent,omitempty"`
Content string `json:"content,omitempty"`
Share struct {
Link string `json:"link"`
ShareText string `json:"share_text"`
} `json:"share,omitempty"`
Reactions []struct {
Reaction string `json:"reaction"`
Actor string `json:"actor"`
} `json:"reactions,omitempty"`
Photos []fbArchiveMedia `json:"photos,omitempty"`
Videos []fbArchiveMedia `json:"videos,omitempty"`
GIFs []fbArchiveMedia `json:"gifs,omitempty"`
AudioFiles []fbArchiveMedia `json:"audio_files,omitempty"`
Sticker fbArchiveMedia `json:"sticker,omitempty"`
} `json:"messages"`
Title string `json:"title"`
IsStillParticipant bool `json:"is_still_participant"`
ThreadPath string `json:"thread_path"`
MagicWords []any `json:"magic_words"`
}
func (thread fbMessengerThread) sentTo(senderName, dsName string) []*timeline.Entity {
var sentTo []*timeline.Entity
for _, participant := range thread.Participants {
participantName := FixString(participant.Name)
if participantName == senderName {
continue
}
sentTo = append(sentTo, &timeline.Entity{
Name: participantName,
Attributes: []timeline.Attribute{
{
Name: dsName + "_name",
Value: participantName,
Identity: true,
},
},
})
}
return sentTo
}
type fbAlbumMeta struct {
Name string `json:"name"`
Photos []struct {
URI string `json:"uri"`
CreationTimestamp int64 `json:"creation_timestamp"`
MediaMetadata struct {
PhotoMetadata struct {
EXIFData []fbEXIFData `json:"exif_data"`
} `json:"photo_metadata"`
} `json:"media_metadata"`
Title string `json:"title"`
Description string `json:"description"`
} `json:"photos"`
CoverPhoto struct {
URI string `json:"uri"`
CreationTimestamp int64 `json:"creation_timestamp"`
MediaMetadata struct {
PhotoMetadata struct {
EXIFData []fbEXIFData `json:"exif_data"`
} `json:"photo_metadata"`
} `json:"media_metadata"`
Title string `json:"title"`
Description string `json:"description"`
} `json:"cover_photo"`
LastModifiedTimestamp int64 `json:"last_modified_timestamp"`
Description string `json:"description"`
}
type fbEXIFData struct {
ISO int `json:"iso"`
FocalLength string `json:"focal_length"`
UploadIP string `json:"upload_ip"`
TakenTimestamp int64 `json:"taken_timestamp"`
ModifiedTimestamp int64 `json:"modified_timestamp"`
CameraMake string `json:"camera_make"`
CameraModel string `json:"camera_model"`
Exposure string `json:"exposure"`
FStop string `json:"f_stop"`
Orientation int `json:"orientation"`
OriginalWidth int `json:"original_width"`
OriginalHeight int `json:"original_height"`
Latitude float64 `json:"latitude"`
Longitude float64 `json:"longitude"`
}
type fbLabelValue struct {
Title string `json:"title,omitempty"`
Label string `json:"label"`
Value string `json:"value,omitempty"`
TimestampValue int `json:"timestamp_value,omitempty"`
Dict []struct {
Label string `json:"label"`
Value string `json:"value"`
} `json:"dict,omitempty"`
}
type fbCheckIns []fbCheckIn
type fbCheckIn struct {
Timestamp int `json:"timestamp"`
Media []any `json:"media"`
LabelValues []fbLabelValue `json:"label_values"`
FBID string `json:"fbid"`
}
// parseCoordsFromDictString parses coordinates from the dict label value.
// Expected format: "(38.575804323267 , -121.4804199327)" which is (lat, lon) order
//
//nolint:mnd
func parseCoordsFromDictString(coordStr string) (lat, lon float64, err error) {
// remove fluff
coordStr = strings.TrimSpace(coordStr)
coordStr = strings.TrimPrefix(coordStr, "(")
coordStr = strings.TrimSuffix(coordStr, ")")
// split lat/lon and parse each float
parts := strings.SplitN(coordStr, ",", 2)
if len(parts) != 2 {
return 0, 0, fmt.Errorf("invalid lat/lon string: %q", coordStr)
}
lat, err = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
if err != nil {
return 0, 0, fmt.Errorf("invalid latitude float: %w", err)
}
lon, err = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64)
if err != nil {
return 0, 0, fmt.Errorf("invalid longitude float: %w", err)
}
if lat < -90 || lat > 90 {
return 0, 0, fmt.Errorf("invalid latitude: %f", lat)
}
if lon < -180 || lon > 180 {
return 0, 0, fmt.Errorf("invalid longitude: %f", lon)
}
return
}