/* Timelinize Copyright (c) 2013 Matthew Holt This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package twitter import ( "bytes" "context" "encoding/json" "errors" "fmt" "html" "io" "io/fs" "log" "math" "net/url" "path" "strconv" "strings" "time" "github.com/timelinize/timelinize/timeline" ) // type tweetFromAPI struct { // InReplyToUserID string `json:"in_reply_to_user_id,omitempty"` // ReferencedTweets []struct { // Type string `json:"type"` // ID string `json:"id"` // } `json:"referenced_tweets,omitempty"` // Text string `json:"text"` // PublicMetrics struct { // RetweetCount int `json:"retweet_count"` // ReplyCount int `json:"reply_count"` // LikeCount int `json:"like_count"` // QuoteCount int `json:"quote_count"` // } `json:"public_metrics"` // Lang string `json:"lang"` // ConversationID string `json:"conversation_id"` // CreatedAt time.Time `json:"created_at"` // ID string `json:"id"` // Entities struct { // Mentions []struct { // Start int `json:"start"` // End int `json:"end"` // Username string `json:"username"` // ID string `json:"id"` // } `json:"mentions"` // URLs []struct { // Start int `json:"start"` // End int `json:"end"` // URL string `json:"url"` // ExpandedURL string `json:"expanded_url"` // DisplayURL string `json:"display_url"` // Images []struct { // URL string `json:"url"` // Width int `json:"width"` // Height int `json:"height"` // } `json:"images"` // Status int `json:"status"` // Title string `json:"title"` // Description string `json:"description"` // UnwoundURL string `json:"unwound_url"` // } `json:"urls"` // Annotations []struct { // Start int `json:"start"` // End int `json:"end"` // Probability float64 `json:"probability"` // Type string `json:"type"` // NormalizedText string `json:"normalized_text"` // } `json:"annotations"` // } `json:"entities,omitempty"` // AuthorID string `json:"author_id"` // ReplySettings string `json:"reply_settings"` // Source string `json:"source"` // PossiblySensitive bool `json:"possibly_sensitive"` // ContextAnnotations []struct { // Domain idNameDesc `json:"domain"` // Entity idNameDesc `json:"entity"` // } `json:"context_annotations,omitempty"` // Attachments struct { // MediaKeys []string `json:"media_keys"` // } `json:"attachments,omitempty"` // Geo struct { // Coordinates struct { // Type string `json:"type"` // "Point" // Coordinates []float64 `json:"coordinates"` // latitude, longitude pair // } `json:"coordinates"` // PlaceID string `json:"place_id,omitempty"` // } `json:"geo,omitempty"` // } // func (t tweetFromAPI) owner(page userTweetsResponsePage) timeline.Entity { // owner := timeline.Entity{ // Attributes: []timeline.Attribute{ // { // Name: identityAttribute, // Value: t.AuthorID, // Identity: true, // }, // }, // } // for _, u := range page.Includes.Users { // if u.Data.ID == t.AuthorID { // owner.Name = u.Data.Name // owner.Attributes = append(owner.Attributes, timeline.Attribute{ // Name: "twitter_username", // Value: u.Data.Username, // }) // break // } // } // return owner // } // type idNameDesc struct { // ID string `json:"id"` // Name string `json:"name"` // Description string `json:"description"` // } // type userTweetsResponsePage struct { // Data []tweetFromAPI `json:"data"` // Includes struct { // Tweets []tweetFromAPI `json:"tweets"` // Users []twitterAccount `json:"users"` // Media []struct { // MediaKey string `json:"media_key"` // Height int `json:"height"` // URL string `json:"url,omitempty"` // Type string `json:"type"` // Width int `json:"width"` // DurationMs int `json:"duration_ms,omitempty"` // PreviewImageURL string `json:"preview_image_url,omitempty"` // PublicMetrics struct { // ViewCount int `json:"view_count"` // } `json:"public_metrics,omitempty"` // } `json:"media"` // Places []struct { // Geo struct { // GeoJSON format (look it up) // Type string `json:"type"` // BBox []float64 `json:"bbox"` // bounding box is the rectangle (usually 4 points) that contain the object // Properties struct { // } `json:"properties"` // } `json:"geo"` // CountryCode string `json:"country_code"` // Name string `json:"name"` // ID string `json:"id"` // PlaceType string `json:"place_type"` // Country string `json:"country"` // FullName string `json:"full_name"` // } `json:"places"` // } `json:"includes"` // Meta struct { // NextToken string `json:"next_token"` // ResultCount int `json:"result_count"` // NewestID string `json:"newest_id"` // OldestID string `json:"oldest_id"` // } `json:"meta"` // Errors []struct { // ResourceType string `json:"resource_type"` // Field string `json:"field"` // Title string `json:"title"` // Section string `json:"section"` // Detail string `json:"detail"` // Type string `json:"type"` // } `json:"errors"` // } // func (tweet tweetFromAPI) toItemGraph(page userTweetsResponsePage) *timeline.Graph { // owner := tweet.owner(page) // // get location info; prefer user's precise location if available, otherwise use place's geo info // var geo timeline.Location // if len(tweet.Geo.Coordinates.Coordinates) == 2 { // geo.Latitude, geo.Longitude = &tweet.Geo.Coordinates.Coordinates[0], &tweet.Geo.Coordinates.Coordinates[1] // } else if tweet.Geo.PlaceID != "" { // for _, pl := range page.Includes.Places { // if len(pl.Geo.BBox) == 4 { // // TODO: we only support a single point, so find center of bounding box... supposedly they should go from SW to NE (counterclockwise) // } // } // } // it := &timeline.Item{ // ID: tweet.ID, // Timestamp: tweet.CreatedAt, // Location: geo, // Owner: owner, // Metadata: timeline.Metadata{ // "Retweets": tweet.PublicMetrics.RetweetCount, // "Quotes": tweet.PublicMetrics.QuoteCount, // "Likes": tweet.PublicMetrics.LikeCount, // "Source": tweet.Source, // "Language": tweet.Lang, // }, // } // if tweet.Text != "" { // expandedText := tweet.Text // // replace any shortened URLs with their fully-expanded (and unwound) form // // (according to Twitter API docs, "unwound" means after following redirects // // from URL shorteners like bitly, etc.) // for _, urlEnt := range tweet.Entities.URLs { // textToReplace := tweet.Text[urlEnt.Start:urlEnt.End] // expandedText = strings.Replace(expandedText, textToReplace, urlEnt.UnwoundURL, 1) // } // it.Content = timeline.ItemData{ // Data: timeline.StringData(expandedText), // } // } // ig := &timeline.Graph{Item: it} // // attach media elements to the main tweet's item graph // for _, mediaKey := range tweet.Attachments.MediaKeys { // // find this media item in the attachments list // for _, attachment := range page.Includes.Media { // // skip attachments that aren't the one we're looking for, // // or which have an empty URL (sigh) // if attachment.MediaKey != mediaKey || attachment.URL == "" { // continue // } // mediaItem := &timeline.Item{ // ID: attachment.MediaKey, // Timestamp: tweet.CreatedAt, // Owner: it.Owner, // Content: timeline.ItemData{ // Filename: path.Base(attachment.URL), // Data: func(context.Context) (io.ReadCloser, error) { // resp, err := http.Get(attachment.URL) // if err != nil { // return nil, err // } // return resp.Body, nil // }, // }, // Metadata: timeline.Metadata{ // "Width": attachment.Width, // "Height": attachment.Height, // "Duration (milliseconds)": attachment.DurationMs, // "Views": attachment.PublicMetrics.ViewCount, // }, // } // ig.ToItem(timeline.RelAttachment, mediaItem) // break // } // } // return ig // } // func (page userTweetsResponsePage) process(itemChan chan<- *timeline.Graph, opt Options) error { // nextTweet: // for _, tweet := range page.Data { // // skip retweets unless configured // if !opt.Retweets { // for _, ref := range tweet.ReferencedTweets { // if ref.Type == "retweeted" || ref.Type == "quoted" { // continue nextTweet // } // } // } // ig := tweet.toItemGraph(page) // // if this tweet is in reply to another tweet, we add that // // other tweet to the graph; but since our unidirectional // // relation ReplyTo goes FROM the first message TO the reply, // // we need to actually create a graph for the first message, // // then connect the original tweet which is the reply; this // // is a little awkward since we're starting with the reply // // and getting its "parent", which is backwards from how it // // was designed (start with parent, get replies). // // TODO: skip replies unless configured to have them // // attach tweet this tweet is in reply to (if any) // for _, ref := range tweet.ReferencedTweets { // if ref.Type != "replied_to" { // continue // } // // find the referenced tweet in the list of attached tweets // for _, refTweet := range page.Includes.Tweets { // if refTweet.ID != ref.ID { // continue // } // // TODO: I think this relationship is backwards... double-check this! // refTweetItemGraph := refTweet.toItemGraph(page) // refTweetItemGraph.Edges = append(refTweetItemGraph.Edges, timeline.Relationship{ // Relation: timeline.RelReply, // To: ig, // }) // // TODO: How much of the conversation can/should we do? Maybe make it configurable? // // this will add both the first tweet and the reply to // // the timeline, then we'll end up sending the reply again, // // but that should be OK since the timeline should be able // // to deduplicate for us // itemChan <- refTweetItemGraph // } // } // itemChan <- ig // } // return nil // } type tweet struct { Contributors any `json:"contributors"` Coordinates *tweetGeo `json:"coordinates,omitempty"` CreatedAt string `json:"created_at"` DisplayTextRange []transInt `json:"display_text_range"` Entities *twitterEntities `json:"entities,omitempty"` // DO NOT USE (https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/entities-object.html#media) ExtendedEntities *extendedEntities `json:"extended_entities,omitempty"` FavoriteCount transInt `json:"favorite_count"` Favorited bool `json:"favorited"` FullText string `json:"full_text"` // tweet_mode=extended (https://developer.twitter.com/en/docs/tweets/tweet-updates) InReplyToScreenName string `json:"in_reply_to_screen_name,omitempty"` InReplyToStatusID transInt `json:"in_reply_to_status_id,omitempty"` InReplyToStatusIDStr string `json:"in_reply_to_status_id_str,omitempty"` InReplyToUserID transInt `json:"in_reply_to_user_id,omitempty"` InReplyToUserIDStr string `json:"in_reply_to_user_id_str,omitempty"` IsQuoteStatus bool `json:"is_quote_status"` Lang string `json:"lang"` Place any `json:"place"` PossiblySensitive bool `json:"possibly_sensitive,omitempty"` RetweetCount transInt `json:"retweet_count"` Retweeted bool `json:"retweeted"` // always false for some reason RetweetedStatus *tweet `json:"retweeted_status"` // API: contains full_text of a retweet (otherwise is truncated) Source string `json:"source"` Text string `json:"text"` // As of Feb. 2019, Twitter API default; truncated at ~140 chars (see FullText) Truncated bool `json:"truncated"` // API: always false in tweet_mode=extended, even if full_text is truncated (retweets) TweetID transInt `json:"id"` TweetIDStr string `json:"id_str"` User *twitterUser `json:"user"` WithheldCopyright bool `json:"withheld_copyright,omitempty"` WithheldInCountries []string `json:"withheld_in_countries,omitempty"` WithheldScope string `json:"withheld_scope,omitempty"` createdAtParsed time.Time owner timeline.Entity source string // "api|archive" } // func (t *tweet) id() string { // return t.TweetIDStr // } // content returns the text of the tweet, or, if text is empty, it // returns the first media item as data (if any). func (t *tweet) content() timeline.ItemData { var data timeline.ItemData if txt := t.text(); txt != "" { data.Data = timeline.StringData(txt) } else if t.ExtendedEntities != nil && len(t.ExtendedEntities.Media) > 0 { data.Filename = t.ExtendedEntities.Media[0].fileName() data.Data = t.ExtendedEntities.Media[0].fileReader data.MediaType = t.ExtendedEntities.Media[0].mediaType() } return data } func (t *tweet) isRetweet() bool { if t.Retweeted || t.RetweetedStatus != nil { return true } // TODO: For some reason, when exporting one's Twitter data, // it always sets "retweeted" to false, even when "full_text" // clearly shows it's a retweet by prefixing it with "RT @" // - this seems like a bug with Twitter's exporter... okay // actually the API does it too, that's dumb return strings.HasPrefix(t.rawText(), "RT @") } func (t *tweet) isEmpty() bool { return strings.TrimSpace(t.text()) == "" && (t.ExtendedEntities == nil || len(t.ExtendedEntities.Media) == 0) } // text returns the full text of the tweet, with entities added inline. func (t *tweet) text() string { txt := t.rawText() expandedText := html.UnescapeString(txt) // replace any annoying t.co shortened URLs with their fully-expanded form if t.Entities != nil { for _, urlEnt := range t.Entities.URLs { const requiredCount = 2 if len(urlEnt.Indices) != requiredCount { continue } textToReplace := txt[urlEnt.Indices[0]:urlEnt.Indices[1]] expandedText = strings.Replace(expandedText, textToReplace, urlEnt.ExpandedURL, 1) } } // replace any links to embedded media with the full URL // (although, this is not necessary, because we link the // media in our own way, without a URL) if t.ExtendedEntities != nil { for _, ent := range t.ExtendedEntities.Media { const requiredCount = 2 if len(ent.Indices) != requiredCount { continue } textToReplace := txt[ent.Indices[0]:ent.Indices[1]] expandedText = strings.Replace(expandedText, textToReplace, ent.ExpandedURL, 1) } } return expandedText } // rawText returns the "raw" text of the tweet, without // replacing entities (but it does dereference any // retweeted status to obtain its text, if present). func (t *tweet) rawText() string { // sigh, retweets get truncated if they're tall, // so we have to get the full text from a subfield if t.RetweetedStatus != nil { return strings.TrimSpace(fmt.Sprintf("RT @%s %s", t.RetweetedStatus.User.ScreenName, t.RetweetedStatus.text())) } if t.FullText != "" { return t.FullText } return t.Text } // location returns the best guess for the tweet's location, because Twitter // randomizes the order of the coordinates we can't always be sure which is which >:( func (t *tweet) location() timeline.Location { var loc timeline.Location if t.Coordinates == nil { return loc } // grr, during dev I noticed that Twitter randomly orders the coordinate values, // so we only know which is which if one of them is > |90|. c0, err := strconv.ParseFloat(t.Coordinates.Coordinates[0], 64) if err != nil { return loc } c1, err := strconv.ParseFloat(t.Coordinates.Coordinates[1], 64) if err != nil { return loc } const maxLatitude = 90 if math.Abs(c0) > maxLatitude { loc.Latitude = &c1 loc.Longitude = &c0 } else { // if c1 > |90|, great, but if both are less than 90, we just don't know loc.Latitude = &c0 loc.Longitude = &c1 } return loc } type tweetGeo struct { Type string `json:"type"` Coordinates []string `json:"coordinates"` // TODO: these are not in any particular order! That's *GREAT*... sigh. My own export has 2 tweets with coords, and they're the same point, but both are in a different order } // type tweetPlace struct { // ID string `json:"id"` // URL string `json:"url"` // PlaceType string `json:"place_type"` // Name string `json:"name"` // FullName string `json:"full_name"` // CountryCode string `json:"country_code"` // Country string `json:"country"` // BoundingBox boundingBox `json:"bounding_box"` // } // type boundingBox struct { // Type string `json:"type"` // // "A series of longitude and latitude points, defining a box which will contain // // the Place entity this bounding box is related to. Each point is an array in // // the form of [longitude, latitude]. Points are grouped into an array per bounding // // box. Bounding box arrays are wrapped in one additional array to be compatible // // with the polygon notation." // Coordinates [][][]float64 `json:"coordinates"` // } type twitterEntities struct { Hashtags []hashtagEntity `json:"hashtags"` Symbols []symbolEntity `json:"symbols"` UserMentions []userMentionEntity `json:"user_mentions"` URLs []urlEntity `json:"urls"` Polls []pollEntity `json:"polls"` } type hashtagEntity struct { Indices []transInt `json:"indices"` Text string `json:"text"` } type symbolEntity struct { Indices []transInt `json:"indices"` Text string `json:"text"` } type urlEntity struct { URL string `json:"url"` ExpandedURL string `json:"expanded_url"` DisplayURL string `json:"display_url"` Unwound *urlEntityUnwound `json:"unwound,omitempty"` Indices []transInt `json:"indices"` } type urlEntityUnwound struct { URL string `json:"url"` Status int `json:"status"` Title string `json:"title"` Description string `json:"description"` } type userMentionEntity struct { Name string `json:"name"` ScreenName string `json:"screen_name"` Indices []transInt `json:"indices"` IDStr string `json:"id_str"` ID transInt `json:"id"` } type pollEntity struct { Options []pollOption `json:"options"` EndDatetime string `json:"end_datetime"` DurationMinutes int `json:"duration_minutes"` } type pollOption struct { Position int `json:"position"` Text string `json:"text"` } type extendedEntities struct { Media []*mediaItem `json:"media"` } type mediaItem struct { AdditionalMediaInfo *additionalMediaInfo `json:"additional_media_info,omitempty"` DisplayURL string `json:"display_url"` ExpandedURL string `json:"expanded_url"` Indices []transInt `json:"indices"` MediaID transInt `json:"id"` MediaIDStr string `json:"id_str"` MediaURL string `json:"media_url"` MediaURLHTTPS string `json:"media_url_https"` Sizes mediaSizes `json:"sizes"` SourceStatusID transInt `json:"source_status_id"` SourceStatusIDStr string `json:"source_status_id_str"` SourceUserID transInt `json:"source_user_id"` SourceUserIDStr string `json:"source_user_id_str"` Type string `json:"type"` URL string `json:"url"` VideoInfo *videoInfo `json:"video_info,omitempty"` parent *tweet readCloser io.ReadCloser // access to the media contents } func (m mediaItem) owner() timeline.Entity { if m.SourceUserIDStr == "" { // assume it is owned by owner of tweet it is contained in return m.parent.owner } return timeline.Entity{ Attributes: []timeline.Attribute{ { Name: identityAttribute, Value: m.SourceUserIDStr, Identity: true, }, }, } } func (m mediaItem) fileName() string { source := m.getURL() u, err := url.Parse(source) if err == nil { source = path.Base(u.Path) } else { source = path.Base(source) } // media in the export archives are prefixed by the // tweet ID they were posted with and a hyphen if m.parent.source == srcArchive { source = fmt.Sprintf("%s-%s", m.parent.TweetIDStr, source) } return source } func (m mediaItem) content() timeline.ItemData { return timeline.ItemData{ Filename: m.fileName(), Data: m.fileReader, MediaType: m.mediaType(), } } func (m mediaItem) fileReader(_ context.Context) (io.ReadCloser, error) { return m.readCloser, nil } func (m mediaItem) mediaType() string { switch m.Type { case "animated_gif": fallthrough case "video": bitrate, contentType, _ := m.getLargestVideo() log.Printf("[DEBUG] Largest video bitrate: %d", bitrate) return contentType case "photo": fname := m.fileName() if fname == "" { return "" } ext := strings.ToLower(path.Ext(fname)) if len(ext) == 0 { return "" } suffix := ext[1:] // trim the leading dot if suffix == "jpg" { suffix = "jpeg" } return "image/" + suffix } return "" } func (m mediaItem) getLargestVideo() (bitrate int, contentType, source string) { if m.VideoInfo == nil { return } bitrate = -1 // so that greater-than comparison below works for video bitrate=0 (animated_gif) for _, v := range m.VideoInfo.Variants { if int(v.Bitrate) > bitrate { source = v.URL contentType = v.ContentType bitrate = int(v.Bitrate) } } return } func (m mediaItem) getURL() string { switch m.Type { case "animated_gif": fallthrough case "video": _, _, source := m.getLargestVideo() return source case "photo": // the size of the photo can be adjusted // when downloading by appending a size // to the end of the URL: ":thumb", ":small", // ":medium", ":large", or ":orig" -- but // we don't do that here, only do that when // actually downloading if m.MediaURLHTTPS != "" { return m.MediaURLHTTPS } return m.MediaURL } return "" } type additionalMediaInfo struct { Monetizable bool `json:"monetizable"` } type videoInfo struct { AspectRatio []transFloat `json:"aspect_ratio"` DurationMillis transInt `json:"duration_millis"` Variants []videoVariants `json:"variants"` } type videoVariants struct { Bitrate transInt `json:"bitrate,omitempty"` ContentType string `json:"content_type,omitempty"` URL string `json:"url"` } type mediaSizes struct { Thumb mediaSize `json:"thumb"` Small mediaSize `json:"small"` Medium mediaSize `json:"medium"` Large mediaSize `json:"large"` } type mediaSize struct { W transInt `json:"w"` H transInt `json:"h"` Resize string `json:"resize"` // fit|crop } type twitterUser struct { ContributorsEnabled bool `json:"contributors_enabled"` CreatedAt string `json:"created_at"` DefaultProfile bool `json:"default_profile"` DefaultProfileImage bool `json:"default_profile_image"` Description string `json:"description"` Entities *twitterEntities `json:"entities"` FavouritesCount int `json:"favourites_count"` FollowersCount int `json:"followers_count"` Following any `json:"following"` FollowRequestSent any `json:"follow_request_sent"` FriendsCount int `json:"friends_count"` GeoEnabled bool `json:"geo_enabled"` HasExtendedProfile bool `json:"has_extended_profile"` IsTranslationEnabled bool `json:"is_translation_enabled"` IsTranslator bool `json:"is_translator"` Lang string `json:"lang"` ListedCount int `json:"listed_count"` Location string `json:"location"` Name string `json:"name"` Notifications any `json:"notifications"` ProfileBackgroundColor string `json:"profile_background_color"` ProfileBackgroundImageURL string `json:"profile_background_image_url"` ProfileBackgroundImageURLHTTPS string `json:"profile_background_image_url_https"` ProfileBackgroundTile bool `json:"profile_background_tile"` ProfileBannerURL string `json:"profile_banner_url"` ProfileImageURL string `json:"profile_image_url"` ProfileImageURLHTTPS string `json:"profile_image_url_https"` ProfileLinkColor string `json:"profile_link_color"` ProfileSidebarBorderColor string `json:"profile_sidebar_border_color"` ProfileSidebarFillColor string `json:"profile_sidebar_fill_color"` ProfileTextColor string `json:"profile_text_color"` ProfileUseBackgroundImage bool `json:"profile_use_background_image"` Protected bool `json:"protected"` ScreenName string `json:"screen_name"` StatusesCount int `json:"statuses_count"` TimeZone any `json:"time_zone"` TranslatorType string `json:"translator_type"` URL string `json:"url"` UserID transInt `json:"id"` UserIDStr string `json:"id_str"` UtcOffset any `json:"utc_offset"` Verified bool `json:"verified"` } type phoneNumberFile []struct { Device struct { PhoneNumber string `json:"phoneNumber"` } `json:"device"` } type profileFile []struct { Profile struct { Description struct { Bio string `json:"bio"` Website string `json:"website"` Location string `json:"location"` } `json:"description"` AvatarMediaURL string `json:"avatarMediaUrl"` HeaderMediaURL string `json:"headerMediaUrl"` } `json:"profile"` } type twitterAccountFile []struct { Account twitterAccount `json:"account"` } type twitterAccount struct { // fields from export archive file: account.js PhoneNumber string `json:"phoneNumber"` Email string `json:"email"` CreatedVia string `json:"createdVia"` CreatedAt string `json:"createdAt"` Username string `json:"username"` AccountID string `json:"accountId"` AccountDisplayName string `json:"accountDisplayName"` // info from file: phone-number.js PhoneNumbers phoneNumberFile // info from file: profile.js Profile profileFile // fields from API endpoint: GET /2/users[/by/username/...] Data struct { Verified bool `json:"verified"` CreatedAt time.Time `json:"created_at"` Description string `json:"description"` Location string `json:"location"` Entities struct { URL struct { URLs []struct { Start int `json:"start"` End int `json:"end"` URL string `json:"url"` ExpandedURL string `json:"expanded_url"` DisplayURL string `json:"display_url"` } `json:"urls"` } `json:"url"` Description struct { Mentions []struct { Start int `json:"start"` End int `json:"end"` Username string `json:"username"` } `json:"mentions"` } `json:"description"` } `json:"entities"` PublicMetrics struct { FollowersCount int `json:"followers_count"` FollowingCount int `json:"following_count"` TweetCount int `json:"tweet_count"` ListedCount int `json:"listed_count"` } `json:"public_metrics"` URL string `json:"url"` ProfileImageURL string `json:"profile_image_url"` Name string `json:"name"` Protected bool `json:"protected"` PinnedTweetID string `json:"pinned_tweet_id"` Username string `json:"username"` ID string `json:"id"` } `json:"data"` } // func (ta twitterAccount) screenName() string { // if ta.Data.Username != "" { // return ta.Data.Username // from API // } // return ta.Username // from archive file // } // func (ta twitterAccount) id() string { // if ta.Data.ID != "" { // return ta.Data.ID // from API // } // return ta.AccountID // from archive file // } // func (ta twitterAccount) name() string { // if ta.Data.Name != "" { // return ta.Data.Name // from API // } // return ta.AccountDisplayName // from archive file // } // entity returns a populated Entity from a populated twitterAccount. func (ta twitterAccount) entity(_ context.Context, fsys fs.FS) timeline.Entity { ent := timeline.Entity{ Name: ta.AccountDisplayName, Attributes: []timeline.Attribute{ { Name: identityAttribute, Value: ta.AccountID, Identity: true, }, { Name: timeline.AttributeEmail, Value: ta.Email, Identifying: true, }, { Name: timeline.AttributePhoneNumber, Value: ta.PhoneNumber, Identifying: true, }, { Name: "twitter_username", Value: ta.Username, Identifying: true, }, }, } for _, ph := range ta.PhoneNumbers { ent.Attributes = append(ent.Attributes, timeline.Attribute{ Name: timeline.AttributePhoneNumber, Value: ph.Device.PhoneNumber, Identifying: true, }) } if len(ta.Profile) > 0 { profile := ta.Profile[0].Profile if profile.AvatarMediaURL != "" { if fsys == nil { ent.NewPicture = timeline.DownloadData(profile.AvatarMediaURL) } else { ent.NewPicture = func(_ context.Context) (io.ReadCloser, error) { avatarFilename := ta.AccountID + "-" + path.Base(profile.AvatarMediaURL) picPath := path.Join("data", "profile_media", avatarFilename) return fsys.Open(picPath) } } } ent.Metadata = timeline.Metadata{ "Twitter bio": profile.Description.Bio, } ent.Attributes = append(ent.Attributes, timeline.Attribute{ Name: "twitter_location", Value: profile.Description.Location, }) } return ent } type directMessages struct { DMConversation dmConversation `json:"dmConversation"` } type dmConversation struct { ConversationID string `json:"conversationId"` Messages []struct { MessageCreate struct { RecipientID string `json:"recipientId"` Reactions []any `json:"reactions"` URLs []struct { URL string `json:"url"` Expanded string `json:"expanded"` Display string `json:"display"` } `json:"urls"` Text string `json:"text"` MediaURLs []string `json:"mediaUrls"` SenderID string `json:"senderId"` ID string `json:"id"` CreatedAt time.Time `json:"createdAt"` } `json:"messageCreate"` } `json:"messages"` } type archiveManifest struct { UserInfo struct { AccountID string `json:"accountId"` UserName string `json:"userName"` DisplayName string `json:"displayName"` } `json:"userInfo"` ArchiveInfo struct { SizeBytes string `json:"sizeBytes"` GenerationDate time.Time `json:"generationDate"` IsPartialArchive bool `json:"isPartialArchive"` MaxPartSizeBytes string `json:"maxPartSizeBytes"` } `json:"archiveInfo"` ReadmeInfo struct { FileName string `json:"fileName"` Directory string `json:"directory"` Name string `json:"name"` } `json:"readmeInfo"` DataTypes map[string]struct { Files []struct { FileName string `json:"fileName"` GlobalName string `json:"globalName"` Count string `json:"count"` } `json:"files"` } `json:"dataTypes"` } // transInt is an integer that could be // unmarshaled from a string, too. This // is needed because the archive JSON // from Twitter uses all string values, // but the same fields are integers with // the API. type transInt int64 func (ti *transInt) UnmarshalJSON(b []byte) error { if len(b) == 0 { return errors.New("no value") } b = bytes.Trim(b, "\"") var i int64 err := json.Unmarshal(b, &i) if err != nil { return err } *ti = transInt(i) return nil } // transFloat is like transInt but for floats. type transFloat float64 func (tf *transFloat) UnmarshalJSON(b []byte) error { if len(b) == 0 { return errors.New("no value") } b = bytes.Trim(b, "\"") var f float64 err := json.Unmarshal(b, &f) if err != nil { return err } *tf = transFloat(f) return nil } const identityAttribute = "twitter_id" const srcArchive = "archive"