395 lines
13 KiB
Go
395 lines
13 KiB
Go
package facebook
|
|
|
|
import (
|
|
"fmt"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/timelinize/timelinize/timeline"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// places_you_have_been_tagged_in.json (year 2024+)
|
|
type fbTaggedPlaces []struct {
|
|
Media []any `json:"media"`
|
|
LabelValues []fbLabelValue `json:"label_values"`
|
|
FBID string `json:"fbid"` // Facebook ID
|
|
}
|
|
|
|
// Generated January 2023
|
|
type profileInfo struct {
|
|
ProfileV2 struct {
|
|
Name struct {
|
|
FullName string `json:"full_name"`
|
|
FirstName string `json:"first_name"`
|
|
MiddleName string `json:"middle_name"`
|
|
LastName string `json:"last_name"`
|
|
} `json:"name"`
|
|
Emails struct {
|
|
Emails []string `json:"emails"`
|
|
PreviousEmails []string `json:"previous_emails"`
|
|
PendingEmails []any `json:"pending_emails"`
|
|
AdAccountEmails []any `json:"ad_account_emails"`
|
|
} `json:"emails"`
|
|
Birthday fbDate `json:"birthday"`
|
|
Gender struct {
|
|
GenderOption string `json:"gender_option"`
|
|
Pronoun string `json:"pronoun"`
|
|
} `json:"gender"`
|
|
PreviousNames []any `json:"previous_names"`
|
|
OtherNames []struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Timestamp int `json:"timestamp"`
|
|
} `json:"other_names"`
|
|
Hometown struct {
|
|
Name string `json:"name"`
|
|
Timestamp int `json:"timestamp"`
|
|
} `json:"hometown"`
|
|
Relationship struct {
|
|
Status string `json:"status"`
|
|
Partner string `json:"partner"`
|
|
Anniversary fbDate `json:"anniversary"`
|
|
Timestamp int `json:"timestamp"`
|
|
} `json:"relationship"`
|
|
FamilyMembers []struct {
|
|
Name string `json:"name"`
|
|
Relation string `json:"relation"`
|
|
Timestamp int `json:"timestamp"`
|
|
} `json:"family_members"`
|
|
EducationExperiences []struct {
|
|
Name string `json:"name"`
|
|
StartTimestamp int `json:"start_timestamp,omitempty"`
|
|
EndTimestamp int `json:"end_timestamp"`
|
|
Graduated bool `json:"graduated"`
|
|
Concentrations []string `json:"concentrations"`
|
|
Degree string `json:"degree,omitempty"`
|
|
SchoolType string `json:"school_type"`
|
|
Timestamp int `json:"timestamp"`
|
|
} `json:"education_experiences"`
|
|
WorkExperiences []any `json:"work_experiences"`
|
|
BloodInfo struct {
|
|
BloodDonorStatus string `json:"blood_donor_status"`
|
|
} `json:"blood_info"`
|
|
Websites []struct {
|
|
Address string `json:"address"`
|
|
} `json:"websites"`
|
|
PhoneNumbers []struct {
|
|
PhoneType string `json:"phone_type"`
|
|
PhoneNumber string `json:"phone_number"`
|
|
Verified bool `json:"verified"`
|
|
} `json:"phone_numbers"`
|
|
Username string `json:"username"`
|
|
RegistrationTimestamp int `json:"registration_timestamp"`
|
|
ProfileURI string `json:"profile_uri"`
|
|
} `json:"profile_v2"`
|
|
}
|
|
|
|
type fbDate struct {
|
|
Year int `json:"year"`
|
|
Month int `json:"month"`
|
|
Day int `json:"day"`
|
|
}
|
|
|
|
type yourPosts []struct {
|
|
Timestamp int64 `json:"timestamp"`
|
|
Attachments []struct {
|
|
Data []struct {
|
|
Text string `json:"text"`
|
|
ExternalContext struct {
|
|
Name string `json:"name"`
|
|
URL string `json:"url"`
|
|
} `json:"external_context"`
|
|
Media fbArchiveMedia `json:"media"`
|
|
Place struct {
|
|
Name string `json:"name"`
|
|
Coordinate struct {
|
|
Latitude float64 `json:"latitude"`
|
|
Longitude float64 `json:"longitude"`
|
|
} `json:"coordinate"`
|
|
Address string `json:"address"`
|
|
URL string `json:"url"`
|
|
} `json:"place"`
|
|
} `json:"data"`
|
|
} `json:"attachments,omitempty"`
|
|
Data []struct {
|
|
Post string `json:"post"`
|
|
} `json:"data"`
|
|
Title string `json:"title,omitempty"`
|
|
Tags []struct {
|
|
Name string `json:"name"`
|
|
} `json:"tags,omitempty"`
|
|
}
|
|
|
|
type fbYourUncategorizedPhotos struct {
|
|
OtherPhotosV2 []fbArchiveMedia `json:"other_photos_v2"`
|
|
}
|
|
|
|
type fbYourVideos struct {
|
|
VideosV2 []fbArchiveMedia `json:"videos_v2"`
|
|
}
|
|
|
|
type fbArchiveMedia struct {
|
|
URI string `json:"uri"`
|
|
CreationTimestamp int64 `json:"creation_timestamp"`
|
|
MediaMetadata struct {
|
|
PhotoMetadata *struct {
|
|
EXIFData []fbEXIFData `json:"exif_data"`
|
|
} `json:"photo_metadata"`
|
|
VideoMetadata *struct {
|
|
EXIFData []struct {
|
|
UploadIP string `json:"upload_ip"`
|
|
UploadTimestamp int64 `json:"upload_timestamp"`
|
|
} `json:"exif_data"`
|
|
} `json:"video_metadata"`
|
|
} `json:"media_metadata"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
func (m fbArchiveMedia) fillItem(item *timeline.Item, d timeline.DirEntry, postText string, logger *zap.Logger) {
|
|
// the media item might not be in this archive if it's a multi-archive export, so set a retrieval key so we can
|
|
// fill the item in later
|
|
retKey := retrievalKey(d, m.URI)
|
|
item.Retrieval.SetKey(retKey)
|
|
item.Retrieval.FieldUpdatePolicies = map[string]timeline.FieldUpdatePolicy{
|
|
"intermediate_location": timeline.UpdatePolicyPreferExisting,
|
|
"timestamp": timeline.UpdatePolicyPreferIncoming,
|
|
"location": timeline.UpdatePolicyPreferIncoming,
|
|
"owner": timeline.UpdatePolicyPreferIncoming,
|
|
"data": timeline.UpdatePolicyPreferExisting,
|
|
"metadata": timeline.UpdatePolicyPreferIncoming,
|
|
}
|
|
|
|
item.IntermediateLocation = m.URI
|
|
|
|
if m.CreationTimestamp > 0 {
|
|
item.Timestamp = time.Unix(m.CreationTimestamp, 0).UTC()
|
|
}
|
|
|
|
if item.Content.Data == nil {
|
|
item.Content = timeline.ItemData{
|
|
Filename: path.Base(m.URI),
|
|
// don't set the Data func here, because it might not be in this archive at all;
|
|
// we traverse media folder separately and use retrieval key to fill in the data file
|
|
}
|
|
}
|
|
|
|
if item.Metadata == nil {
|
|
item.Metadata = make(timeline.Metadata)
|
|
}
|
|
|
|
item.Metadata["Title"] = FixString(m.Title)
|
|
if desc := FixString(m.Description); desc != postText {
|
|
// TODO: this filter probably doesn't work with multiple attachment data where some are text with the same description
|
|
item.Metadata["Description"] = FixString(m.Description)
|
|
}
|
|
|
|
// TODO: use media importer functions for this as well...
|
|
// Collect metadata... while we do, prefer TakenTimestamp over CreationTimestamp;
|
|
// I think TakenTimestamp is when the media was captured, and CreationTimestamp is
|
|
// when the post or attachment was created on Facebook. I think.
|
|
// We also include all the EXIF data Facebook gives us because they strip the EXIF
|
|
// data from the files they give us.
|
|
if m.MediaMetadata.PhotoMetadata != nil {
|
|
for _, exif := range m.MediaMetadata.PhotoMetadata.EXIFData {
|
|
if exif.TakenTimestamp != 0 { // negative is valid! (pre-1970)
|
|
item.Timestamp = time.Unix(exif.TakenTimestamp, 0).UTC()
|
|
|
|
// I've seen this happen where the value is -62169958800 (for multiple items!)
|
|
// which results in a Very Wrong Timestamp. I don't know what to make of this
|
|
// value, so we have to simply throw it away.
|
|
if item.Timestamp.Year() <= 0 {
|
|
logger.Warn("data source provided TakenTimestamp that is before year 0; not using it", zap.Time("taken_timestamp", item.Timestamp))
|
|
item.Timestamp = time.Time{}
|
|
}
|
|
}
|
|
if lat := exif.Latitude; lat != 0 {
|
|
item.Location.Latitude = &lat
|
|
}
|
|
if lon := exif.Longitude; lon != 0 {
|
|
item.Location.Longitude = &lon
|
|
}
|
|
item.Metadata["ISO"] = exif.ISO
|
|
item.Metadata["Focal length"] = exif.FocalLength
|
|
item.Metadata["Upload IP"] = exif.UploadIP
|
|
if exif.ModifiedTimestamp > 0 {
|
|
item.Metadata["Modified"] = time.Unix(exif.ModifiedTimestamp, 0).UTC()
|
|
}
|
|
item.Metadata["Camera make"] = exif.CameraMake
|
|
item.Metadata["Camera model"] = exif.CameraModel
|
|
item.Metadata["Exposure"] = exif.Exposure
|
|
item.Metadata["F-stop"] = exif.FStop
|
|
item.Metadata["Orientation"] = exif.Orientation
|
|
item.Metadata["Original width"] = exif.OriginalWidth
|
|
item.Metadata["Original height"] = exif.OriginalHeight
|
|
}
|
|
} else if m.MediaMetadata.VideoMetadata != nil {
|
|
for _, exif := range m.MediaMetadata.VideoMetadata.EXIFData {
|
|
item.Metadata["Upload IP"] = exif.UploadTimestamp
|
|
item.Metadata["Upload timestamp"] = time.Unix(exif.UploadTimestamp, 0).UTC()
|
|
}
|
|
}
|
|
}
|
|
|
|
func retrievalKey(d timeline.DirEntry, pathOrURI string) string {
|
|
archiveName := filepath.Base(d.FullPath())
|
|
splitAt := strings.LastIndex(archiveName, "-")
|
|
exportID := archiveName
|
|
if splitAt > -1 {
|
|
exportID = archiveName[:splitAt]
|
|
}
|
|
return fmt.Sprintf("facebook::%s::%s", exportID, pathOrURI)
|
|
}
|
|
|
|
type fbMessengerThread struct {
|
|
Participants []struct {
|
|
Name string `json:"name"`
|
|
} `json:"participants"`
|
|
Messages []struct {
|
|
SenderName string `json:"sender_name"`
|
|
TimestampMS int64 `json:"timestamp_ms"`
|
|
IsUnsent bool `json:"is_unsent,omitempty"`
|
|
Content string `json:"content,omitempty"`
|
|
Share struct {
|
|
Link string `json:"link"`
|
|
ShareText string `json:"share_text"`
|
|
} `json:"share,omitempty"`
|
|
Reactions []struct {
|
|
Reaction string `json:"reaction"`
|
|
Actor string `json:"actor"`
|
|
} `json:"reactions,omitempty"`
|
|
Photos []fbArchiveMedia `json:"photos,omitempty"`
|
|
Videos []fbArchiveMedia `json:"videos,omitempty"`
|
|
GIFs []fbArchiveMedia `json:"gifs,omitempty"`
|
|
AudioFiles []fbArchiveMedia `json:"audio_files,omitempty"`
|
|
Sticker fbArchiveMedia `json:"sticker,omitempty"`
|
|
} `json:"messages"`
|
|
Title string `json:"title"`
|
|
IsStillParticipant bool `json:"is_still_participant"`
|
|
ThreadPath string `json:"thread_path"`
|
|
MagicWords []any `json:"magic_words"`
|
|
}
|
|
|
|
func (thread fbMessengerThread) sentTo(senderName, dsName string) []*timeline.Entity {
|
|
var sentTo []*timeline.Entity
|
|
for _, participant := range thread.Participants {
|
|
participantName := FixString(participant.Name)
|
|
if participantName == senderName {
|
|
continue
|
|
}
|
|
sentTo = append(sentTo, &timeline.Entity{
|
|
Name: participantName,
|
|
Attributes: []timeline.Attribute{
|
|
{
|
|
Name: dsName + "_name",
|
|
Value: participantName,
|
|
Identity: true,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
return sentTo
|
|
}
|
|
|
|
type fbAlbumMeta struct {
|
|
Name string `json:"name"`
|
|
Photos []struct {
|
|
URI string `json:"uri"`
|
|
CreationTimestamp int64 `json:"creation_timestamp"`
|
|
MediaMetadata struct {
|
|
PhotoMetadata struct {
|
|
EXIFData []fbEXIFData `json:"exif_data"`
|
|
} `json:"photo_metadata"`
|
|
} `json:"media_metadata"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
} `json:"photos"`
|
|
CoverPhoto struct {
|
|
URI string `json:"uri"`
|
|
CreationTimestamp int64 `json:"creation_timestamp"`
|
|
MediaMetadata struct {
|
|
PhotoMetadata struct {
|
|
EXIFData []fbEXIFData `json:"exif_data"`
|
|
} `json:"photo_metadata"`
|
|
} `json:"media_metadata"`
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
} `json:"cover_photo"`
|
|
LastModifiedTimestamp int64 `json:"last_modified_timestamp"`
|
|
Description string `json:"description"`
|
|
}
|
|
|
|
type fbEXIFData struct {
|
|
ISO int `json:"iso"`
|
|
FocalLength string `json:"focal_length"`
|
|
UploadIP string `json:"upload_ip"`
|
|
TakenTimestamp int64 `json:"taken_timestamp"`
|
|
ModifiedTimestamp int64 `json:"modified_timestamp"`
|
|
CameraMake string `json:"camera_make"`
|
|
CameraModel string `json:"camera_model"`
|
|
Exposure string `json:"exposure"`
|
|
FStop string `json:"f_stop"`
|
|
Orientation int `json:"orientation"`
|
|
OriginalWidth int `json:"original_width"`
|
|
OriginalHeight int `json:"original_height"`
|
|
Latitude float64 `json:"latitude"`
|
|
Longitude float64 `json:"longitude"`
|
|
}
|
|
|
|
type fbLabelValue struct {
|
|
Title string `json:"title,omitempty"`
|
|
Label string `json:"label"`
|
|
Value string `json:"value,omitempty"`
|
|
TimestampValue int `json:"timestamp_value,omitempty"`
|
|
Dict []struct {
|
|
Label string `json:"label"`
|
|
Value string `json:"value"`
|
|
} `json:"dict,omitempty"`
|
|
}
|
|
|
|
type fbCheckIns []fbCheckIn
|
|
|
|
type fbCheckIn struct {
|
|
Timestamp int `json:"timestamp"`
|
|
Media []any `json:"media"`
|
|
LabelValues []fbLabelValue `json:"label_values"`
|
|
FBID string `json:"fbid"`
|
|
}
|
|
|
|
// parseCoordsFromDictString parses coordinates from the dict label value.
|
|
// Expected format: "(38.575804323267 , -121.4804199327)" which is (lat, lon) order
|
|
//
|
|
//nolint:mnd
|
|
func parseCoordsFromDictString(coordStr string) (lat, lon float64, err error) {
|
|
// remove fluff
|
|
coordStr = strings.TrimSpace(coordStr)
|
|
coordStr = strings.TrimPrefix(coordStr, "(")
|
|
coordStr = strings.TrimSuffix(coordStr, ")")
|
|
|
|
// split lat/lon and parse each float
|
|
parts := strings.SplitN(coordStr, ",", 2)
|
|
if len(parts) != 2 {
|
|
return 0, 0, fmt.Errorf("invalid lat/lon string: %q", coordStr)
|
|
}
|
|
lat, err = strconv.ParseFloat(strings.TrimSpace(parts[0]), 64)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("invalid latitude float: %w", err)
|
|
}
|
|
lon, err = strconv.ParseFloat(strings.TrimSpace(parts[1]), 64)
|
|
if err != nil {
|
|
return 0, 0, fmt.Errorf("invalid longitude float: %w", err)
|
|
}
|
|
|
|
if lat < -90 || lat > 90 {
|
|
return 0, 0, fmt.Errorf("invalid latitude: %f", lat)
|
|
}
|
|
if lon < -180 || lon > 180 {
|
|
return 0, 0, fmt.Errorf("invalid longitude: %f", lon)
|
|
}
|
|
|
|
return
|
|
}
|