timelinize/datasources/twitter/models.go

/*
	Timelinize
	Copyright (c) 2013 Matthew Holt

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU Affero General Public License as published
	by the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU Affero General Public License for more details.

	You should have received a copy of the GNU Affero General Public License
	along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

package twitter

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"html"
	"io"
	"io/fs"
	"log"
	"math"
	"net/url"
	"path"
	"strconv"
	"strings"
	"time"

	"github.com/timelinize/timelinize/timeline"
)

// type tweetFromAPI struct {
// 	InReplyToUserID  string `json:"in_reply_to_user_id,omitempty"`
// 	ReferencedTweets []struct {
// 		Type string `json:"type"`
// 		ID   string `json:"id"`
// 	} `json:"referenced_tweets,omitempty"`
// 	Text          string `json:"text"`
// 	PublicMetrics struct {
// 		RetweetCount int `json:"retweet_count"`
// 		ReplyCount   int `json:"reply_count"`
// 		LikeCount    int `json:"like_count"`
// 		QuoteCount   int `json:"quote_count"`
// 	} `json:"public_metrics"`
// 	Lang           string    `json:"lang"`
// 	ConversationID string    `json:"conversation_id"`
// 	CreatedAt      time.Time `json:"created_at"`
// 	ID             string    `json:"id"`
// 	Entities       struct {
// 		Mentions []struct {
// 			Start    int    `json:"start"`
// 			End      int    `json:"end"`
// 			Username string `json:"username"`
// 			ID       string `json:"id"`
// 		} `json:"mentions"`
// 		URLs []struct {
// 			Start       int    `json:"start"`
// 			End         int    `json:"end"`
// 			URL         string `json:"url"`
// 			ExpandedURL string `json:"expanded_url"`
// 			DisplayURL  string `json:"display_url"`
// 			Images      []struct {
// 				URL    string `json:"url"`
// 				Width  int    `json:"width"`
// 				Height int    `json:"height"`
// 			} `json:"images"`
// 			Status      int    `json:"status"`
// 			Title       string `json:"title"`
// 			Description string `json:"description"`
// 			UnwoundURL  string `json:"unwound_url"`
// 		} `json:"urls"`
// 		Annotations []struct {
// 			Start          int     `json:"start"`
// 			End            int     `json:"end"`
// 			Probability    float64 `json:"probability"`
// 			Type           string  `json:"type"`
// 			NormalizedText string  `json:"normalized_text"`
// 		} `json:"annotations"`
// 	} `json:"entities,omitempty"`
// 	AuthorID           string `json:"author_id"`
// 	ReplySettings      string `json:"reply_settings"`
// 	Source             string `json:"source"`
// 	PossiblySensitive  bool   `json:"possibly_sensitive"`
// 	ContextAnnotations []struct {
// 		Domain idNameDesc `json:"domain"`
// 		Entity idNameDesc `json:"entity"`
// 	} `json:"context_annotations,omitempty"`
// 	Attachments struct {
// 		MediaKeys []string `json:"media_keys"`
// 	} `json:"attachments,omitempty"`
// 	Geo struct {
// 		Coordinates struct {
// 			Type        string    `json:"type"`        // "Point"
// 			Coordinates []float64 `json:"coordinates"` // latitude, longitude pair
// 		} `json:"coordinates"`
// 		PlaceID string `json:"place_id,omitempty"`
// 	} `json:"geo,omitempty"`
// }

// func (t tweetFromAPI) owner(page userTweetsResponsePage) timeline.Entity {
// 	owner := timeline.Entity{
// 		Attributes: []timeline.Attribute{
// 			{
// 				Name:     identityAttribute,
// 				Value:    t.AuthorID,
// 				Identity: true,
// 			},
// 		},
// 	}
// 	for _, u := range page.Includes.Users {
// 		if u.Data.ID == t.AuthorID {
// 			owner.Name = u.Data.Name
// 			owner.Attributes = append(owner.Attributes, timeline.Attribute{
// 				Name:  "twitter_username",
// 				Value: u.Data.Username,
// 			})
// 			break
// 		}
// 	}
// 	return owner
// }

// type idNameDesc struct {
// 	ID          string `json:"id"`
// 	Name        string `json:"name"`
// 	Description string `json:"description"`
// }

// type userTweetsResponsePage struct {
// 	Data []tweetFromAPI `json:"data"`

// 	Includes struct {
// 		Tweets []tweetFromAPI   `json:"tweets"`
// 		Users  []twitterAccount `json:"users"`
// 		Media  []struct {
// 			MediaKey        string `json:"media_key"`
// 			Height          int    `json:"height"`
// 			URL             string `json:"url,omitempty"`
// 			Type            string `json:"type"`
// 			Width           int    `json:"width"`
// 			DurationMs      int    `json:"duration_ms,omitempty"`
// 			PreviewImageURL string `json:"preview_image_url,omitempty"`
// 			PublicMetrics   struct {
// 				ViewCount int `json:"view_count"`
// 			} `json:"public_metrics,omitempty"`
// 		} `json:"media"`
// 		Places []struct {
// 			Geo struct { // GeoJSON format (look it up)
// 				Type       string    `json:"type"`
// 				BBox       []float64 `json:"bbox"` // bounding box is the rectangle (usually 4 points) that contain the object
// 				Properties struct {
// 				} `json:"properties"`
// 			} `json:"geo"`
// 			CountryCode string `json:"country_code"`
// 			Name        string `json:"name"`
// 			ID          string `json:"id"`
// 			PlaceType   string `json:"place_type"`
// 			Country     string `json:"country"`
// 			FullName    string `json:"full_name"`
// 		} `json:"places"`
// 	} `json:"includes"`

// 	Meta struct {
// 		NextToken   string `json:"next_token"`
// 		ResultCount int    `json:"result_count"`
// 		NewestID    string `json:"newest_id"`
// 		OldestID    string `json:"oldest_id"`
// 	} `json:"meta"`

// 	Errors []struct {
// 		ResourceType string `json:"resource_type"`
// 		Field        string `json:"field"`
// 		Title        string `json:"title"`
// 		Section      string `json:"section"`
// 		Detail       string `json:"detail"`
// 		Type         string `json:"type"`
// 	} `json:"errors"`
// }

// func (tweet tweetFromAPI) toItemGraph(page userTweetsResponsePage) *timeline.Graph {
// 	owner := tweet.owner(page)

// 	// get location info; prefer user's precise location if available, otherwise use place's geo info
// 	var geo timeline.Location
// 	if len(tweet.Geo.Coordinates.Coordinates) == 2 {
// 		geo.Latitude, geo.Longitude = &tweet.Geo.Coordinates.Coordinates[0], &tweet.Geo.Coordinates.Coordinates[1]
// 	} else if tweet.Geo.PlaceID != "" {
// 		for _, pl := range page.Includes.Places {
// 			if len(pl.Geo.BBox) == 4 {
// 				// TODO: we only support a single point, so find center of bounding box... supposedly they should go from SW to NE (counterclockwise)
// 			}
// 		}
// 	}

// 	it := &timeline.Item{
// 		ID:        tweet.ID,
// 		Timestamp: tweet.CreatedAt,
// 		Location:  geo,
// 		Owner:     owner,
// 		Metadata: timeline.Metadata{
// 			"Retweets": tweet.PublicMetrics.RetweetCount,
// 			"Quotes":   tweet.PublicMetrics.QuoteCount,
// 			"Likes":    tweet.PublicMetrics.LikeCount,
// 			"Source":   tweet.Source,
// 			"Language": tweet.Lang,
// 		},
// 	}
// 	if tweet.Text != "" {
// 		expandedText := tweet.Text

// 		// replace any shortened URLs with their fully-expanded (and unwound) form
// 		// (according to Twitter API docs, "unwound" means after following redirects
// 		// from URL shorteners like bitly, etc.)
// 		for _, urlEnt := range tweet.Entities.URLs {
// 			textToReplace := tweet.Text[urlEnt.Start:urlEnt.End]
// 			expandedText = strings.Replace(expandedText, textToReplace, urlEnt.UnwoundURL, 1)
// 		}

// 		it.Content = timeline.ItemData{
// 			Data: timeline.StringData(expandedText),
// 		}
// 	}

// 	ig := &timeline.Graph{Item: it}

// 	// attach media elements to the main tweet's item graph
// 	for _, mediaKey := range tweet.Attachments.MediaKeys {
// 		// find this media item in the attachments list
// 		for _, attachment := range page.Includes.Media {
// 			// skip attachments that aren't the one we're looking for,
// 			// or which have an empty URL (sigh)
// 			if attachment.MediaKey != mediaKey || attachment.URL == "" {
// 				continue
// 			}

// 			mediaItem := &timeline.Item{
// 				ID:        attachment.MediaKey,
// 				Timestamp: tweet.CreatedAt,
// 				Owner:     it.Owner,
// 				Content: timeline.ItemData{
// 					Filename: path.Base(attachment.URL),
// 					Data: func(context.Context) (io.ReadCloser, error) {
// 						resp, err := http.Get(attachment.URL)
// 						if err != nil {
// 							return nil, err
// 						}
// 						return resp.Body, nil
// 					},
// 				},
// 				Metadata: timeline.Metadata{
// 					"Width":                   attachment.Width,
// 					"Height":                  attachment.Height,
// 					"Duration (milliseconds)": attachment.DurationMs,
// 					"Views":                   attachment.PublicMetrics.ViewCount,
// 				},
// 			}

// 			ig.ToItem(timeline.RelAttachment, mediaItem)
// 			break
// 		}
// 	}

// 	return ig
// }

// func (page userTweetsResponsePage) process(itemChan chan<- *timeline.Graph, opt Options) error {
// nextTweet:
// 	for _, tweet := range page.Data {
// 		// skip retweets unless configured
// 		if !opt.Retweets {
// 			for _, ref := range tweet.ReferencedTweets {
// 				if ref.Type == "retweeted" || ref.Type == "quoted" {
// 					continue nextTweet
// 				}
// 			}
// 		}

// 		ig := tweet.toItemGraph(page)

// 		// if this tweet is in reply to another tweet, we add that
// 		// other tweet to the graph; but since our unidirectional
// 		// relation ReplyTo goes FROM the first message TO the reply,
// 		// we need to actually create a graph for the first message,
// 		// then connect the original tweet which is the reply; this
// 		// is a little awkward since we're starting with the reply
// 		// and getting its "parent", which is backwards from how it
// 		// was designed (start with parent, get replies).

// 		// TODO: skip replies unless configured to have them

// 		// attach tweet this tweet is in reply to (if any)
// 		for _, ref := range tweet.ReferencedTweets {
// 			if ref.Type != "replied_to" {
// 				continue
// 			}

// 			// find the referenced tweet in the list of attached tweets
// 			for _, refTweet := range page.Includes.Tweets {
// 				if refTweet.ID != ref.ID {
// 					continue
// 				}

// 				// TODO: I think this relationship is backwards... double-check this!
// 				refTweetItemGraph := refTweet.toItemGraph(page)
// 				refTweetItemGraph.Edges = append(refTweetItemGraph.Edges, timeline.Relationship{
// 					Relation: timeline.RelReply,
// 					To:       ig,
// 				})

// 				// TODO: How much of the conversation can/should we do? Maybe make it configurable?

// 				// this will add both the first tweet and the reply to
// 				// the timeline, then we'll end up sending the reply again,
// 				// but that should be OK since the timeline should be able
// 				// to deduplicate for us
// 				itemChan <- refTweetItemGraph
// 			}
// 		}

// 		itemChan <- ig
// 	}

// 	return nil
// }

type tweet struct {
	Contributors         any               `json:"contributors"`
	Coordinates          *tweetGeo         `json:"coordinates,omitempty"`
	CreatedAt            string            `json:"created_at"`
	DisplayTextRange     []transInt        `json:"display_text_range"`
	Entities             *twitterEntities  `json:"entities,omitempty"` // DO NOT USE (https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/entities-object.html#media)
	ExtendedEntities     *extendedEntities `json:"extended_entities,omitempty"`
	FavoriteCount        transInt          `json:"favorite_count"`
	Favorited            bool              `json:"favorited"`
	FullText             string            `json:"full_text"` // tweet_mode=extended (https://developer.twitter.com/en/docs/tweets/tweet-updates)
	InReplyToScreenName  string            `json:"in_reply_to_screen_name,omitempty"`
	InReplyToStatusID    transInt          `json:"in_reply_to_status_id,omitempty"`
	InReplyToStatusIDStr string            `json:"in_reply_to_status_id_str,omitempty"`
	InReplyToUserID      transInt          `json:"in_reply_to_user_id,omitempty"`
	InReplyToUserIDStr   string            `json:"in_reply_to_user_id_str,omitempty"`
	IsQuoteStatus        bool              `json:"is_quote_status"`
	Lang                 string            `json:"lang"`
	Place                any               `json:"place"`
	PossiblySensitive    bool              `json:"possibly_sensitive,omitempty"`
	RetweetCount         transInt          `json:"retweet_count"`
	Retweeted            bool              `json:"retweeted"`        // always false for some reason
	RetweetedStatus      *tweet            `json:"retweeted_status"` // API: contains full_text of a retweet (otherwise is truncated)
	Source               string            `json:"source"`
	Text                 string            `json:"text"`      // As of Feb. 2019, Twitter API default; truncated at ~140 chars (see FullText)
	Truncated            bool              `json:"truncated"` // API: always false in tweet_mode=extended, even if full_text is truncated (retweets)
	TweetID              transInt          `json:"id"`
	TweetIDStr           string            `json:"id_str"`
	User                 *twitterUser      `json:"user"`
	WithheldCopyright    bool              `json:"withheld_copyright,omitempty"`
	WithheldInCountries  []string          `json:"withheld_in_countries,omitempty"`
	WithheldScope        string            `json:"withheld_scope,omitempty"`

	createdAtParsed time.Time
	owner           timeline.Entity
	source          string // "api|archive"
}

// func (t *tweet) id() string {
// 	return t.TweetIDStr
// }

// content returns the text of the tweet, or, if text is empty, it
// returns the first media item as data (if any).
func (t *tweet) content() timeline.ItemData {
	var data timeline.ItemData
	if txt := t.text(); txt != "" {
		data.Data = timeline.StringData(txt)
	} else if t.ExtendedEntities != nil && len(t.ExtendedEntities.Media) > 0 {
		data.Filename = t.ExtendedEntities.Media[0].fileName()
		data.Data = t.ExtendedEntities.Media[0].fileReader
		data.MediaType = t.ExtendedEntities.Media[0].mediaType()
	}
	return data
}

func (t *tweet) isRetweet() bool {
	if t.Retweeted || t.RetweetedStatus != nil {
		return true
	}
	// TODO: For some reason, when exporting one's Twitter data,
	// it always sets "retweeted" to false, even when "full_text"
	// clearly shows it's a retweet by prefixing it with "RT @"
	// - this seems like a bug with Twitter's exporter... okay
	// actually the API does it too, that's dumb
	return strings.HasPrefix(t.rawText(), "RT @")
}

func (t *tweet) isEmpty() bool {
	return strings.TrimSpace(t.text()) == "" &&
		(t.ExtendedEntities == nil || len(t.ExtendedEntities.Media) == 0)
}

// text returns the full text of the tweet, with entities added inline.
func (t *tweet) text() string {
	txt := t.rawText()
	expandedText := html.UnescapeString(txt)

	// replace any annoying t.co shortened URLs with their fully-expanded form
	if t.Entities != nil {
		for _, urlEnt := range t.Entities.URLs {
			const requiredCount = 2
			if len(urlEnt.Indices) != requiredCount {
				continue
			}
			textToReplace := txt[urlEnt.Indices[0]:urlEnt.Indices[1]]
			expandedText = strings.Replace(expandedText, textToReplace, urlEnt.ExpandedURL, 1)
		}
	}

	// replace any links to embedded media with the full URL
	// (although, this is not necessary, because we link the
	// media in our own way, without a URL)
	if t.ExtendedEntities != nil {
		for _, ent := range t.ExtendedEntities.Media {
			const requiredCount = 2
			if len(ent.Indices) != requiredCount {
				continue
			}
			textToReplace := txt[ent.Indices[0]:ent.Indices[1]]
			expandedText = strings.Replace(expandedText, textToReplace, ent.ExpandedURL, 1)
		}
	}

	return expandedText
}

// rawText returns the "raw" text of the tweet, without
// replacing entities (but it does dereference any
// retweeted status to obtain its text, if present).
func (t *tweet) rawText() string {
	// sigh, retweets get truncated if they're tall,
	// so we have to get the full text from a subfield
	if t.RetweetedStatus != nil {
		return strings.TrimSpace(fmt.Sprintf("RT @%s %s",
			t.RetweetedStatus.User.ScreenName, t.RetweetedStatus.text()))
	}
	if t.FullText != "" {
		return t.FullText
	}
	return t.Text
}

// location returns the best guess for the tweet's location, because Twitter
// randomizes the order of the coordinates we can't always be sure which is which >:(
func (t *tweet) location() timeline.Location {
	var loc timeline.Location
	if t.Coordinates == nil {
		return loc
	}

	// grr, during dev I noticed that Twitter randomly orders the coordinate values,
	// so we only know which is which if one of them is > |90|.
	c0, err := strconv.ParseFloat(t.Coordinates.Coordinates[0], 64)
	if err != nil {
		return loc
	}
	c1, err := strconv.ParseFloat(t.Coordinates.Coordinates[1], 64)
	if err != nil {
		return loc
	}
	const maxLatitude = 90
	if math.Abs(c0) > maxLatitude {
		loc.Latitude = &c1
		loc.Longitude = &c0
	} else {
		// if c1 > |90|, great, but if both are less than 90, we just don't know
		loc.Latitude = &c0
		loc.Longitude = &c1
	}

	return loc
}

type tweetGeo struct {
	Type        string   `json:"type"`
	Coordinates []string `json:"coordinates"` // TODO: these are not in any particular order! That's *GREAT*... sigh. My own export has 2 tweets with coords, and they're the same point, but both are in a different order
}

// type tweetPlace struct {
// 	ID          string      `json:"id"`
// 	URL         string      `json:"url"`
// 	PlaceType   string      `json:"place_type"`
// 	Name        string      `json:"name"`
// 	FullName    string      `json:"full_name"`
// 	CountryCode string      `json:"country_code"`
// 	Country     string      `json:"country"`
// 	BoundingBox boundingBox `json:"bounding_box"`
// }

// type boundingBox struct {
// 	Type string `json:"type"`

// 	// "A series of longitude and latitude points, defining a box which will contain
// 	// the Place entity this bounding box is related to. Each point is an array in
// 	// the form of [longitude, latitude]. Points are grouped into an array per bounding
// 	// box. Bounding box arrays are wrapped in one additional array to be compatible
// 	// with the polygon notation."
// 	Coordinates [][][]float64 `json:"coordinates"`
// }

type twitterEntities struct {
	Hashtags     []hashtagEntity     `json:"hashtags"`
	Symbols      []symbolEntity      `json:"symbols"`
	UserMentions []userMentionEntity `json:"user_mentions"`
	URLs         []urlEntity         `json:"urls"`
	Polls        []pollEntity        `json:"polls"`
}

type hashtagEntity struct {
	Indices []transInt `json:"indices"`
	Text    string     `json:"text"`
}

type symbolEntity struct {
	Indices []transInt `json:"indices"`
	Text    string     `json:"text"`
}

type urlEntity struct {
	URL         string            `json:"url"`
	ExpandedURL string            `json:"expanded_url"`
	DisplayURL  string            `json:"display_url"`
	Unwound     *urlEntityUnwound `json:"unwound,omitempty"`
	Indices     []transInt        `json:"indices"`
}

type urlEntityUnwound struct {
	URL         string `json:"url"`
	Status      int    `json:"status"`
	Title       string `json:"title"`
	Description string `json:"description"`
}

type userMentionEntity struct {
	Name       string     `json:"name"`
	ScreenName string     `json:"screen_name"`
	Indices    []transInt `json:"indices"`
	IDStr      string     `json:"id_str"`
	ID         transInt   `json:"id"`
}

type pollEntity struct {
	Options         []pollOption `json:"options"`
	EndDatetime     string       `json:"end_datetime"`
	DurationMinutes int          `json:"duration_minutes"`
}

type pollOption struct {
	Position int    `json:"position"`
	Text     string `json:"text"`
}

type extendedEntities struct {
	Media []*mediaItem `json:"media"`
}

type mediaItem struct {
	AdditionalMediaInfo *additionalMediaInfo `json:"additional_media_info,omitempty"`
	DisplayURL          string               `json:"display_url"`
	ExpandedURL         string               `json:"expanded_url"`
	Indices             []transInt           `json:"indices"`
	MediaID             transInt             `json:"id"`
	MediaIDStr          string               `json:"id_str"`
	MediaURL            string               `json:"media_url"`
	MediaURLHTTPS       string               `json:"media_url_https"`
	Sizes               mediaSizes           `json:"sizes"`
	SourceStatusID      transInt             `json:"source_status_id"`
	SourceStatusIDStr   string               `json:"source_status_id_str"`
	SourceUserID        transInt             `json:"source_user_id"`
	SourceUserIDStr     string               `json:"source_user_id_str"`
	Type                string               `json:"type"`
	URL                 string               `json:"url"`
	VideoInfo           *videoInfo           `json:"video_info,omitempty"`

	parent     *tweet
	readCloser io.ReadCloser // access to the media contents
}

func (m mediaItem) owner() timeline.Entity {
	if m.SourceUserIDStr == "" {
		// assume it is owned by owner of tweet it is contained in
		return m.parent.owner
	}
	return timeline.Entity{
		Attributes: []timeline.Attribute{
			{
				Name:     identityAttribute,
				Value:    m.SourceUserIDStr,
				Identity: true,
			},
		},
	}
}

func (m mediaItem) fileName() string {
	source := m.getURL()
	u, err := url.Parse(source)
	if err == nil {
		source = path.Base(u.Path)
	} else {
		source = path.Base(source)
	}
	// media in the export archives are prefixed by the
	// tweet ID they were posted with and a hyphen
	if m.parent.source == srcArchive {
		source = fmt.Sprintf("%s-%s", m.parent.TweetIDStr, source)
	}
	return source
}

func (m mediaItem) content() timeline.ItemData {
	return timeline.ItemData{
		Filename:  m.fileName(),
		Data:      m.fileReader,
		MediaType: m.mediaType(),
	}
}

func (m mediaItem) fileReader(_ context.Context) (io.ReadCloser, error) {
	return m.readCloser, nil
}

func (m mediaItem) mediaType() string {
	switch m.Type {
	case "animated_gif":
		fallthrough
	case "video":
		bitrate, contentType, _ := m.getLargestVideo()
		log.Printf("[DEBUG] Largest video bitrate: %d", bitrate)
		return contentType
	case "photo":
		fname := m.fileName()
		if fname == "" {
			return ""
		}
		ext := strings.ToLower(path.Ext(fname))
		if len(ext) == 0 {
			return ""
		}
		suffix := ext[1:] // trim the leading dot
		if suffix == "jpg" {
			suffix = "jpeg"
		}
		return "image/" + suffix
	}
	return ""
}

func (m mediaItem) getLargestVideo() (bitrate int, contentType, source string) {
	if m.VideoInfo == nil {
		return
	}
	bitrate = -1 // so that greater-than comparison below works for video bitrate=0 (animated_gif)
	for _, v := range m.VideoInfo.Variants {
		if int(v.Bitrate) > bitrate {
			source = v.URL
			contentType = v.ContentType
			bitrate = int(v.Bitrate)
		}
	}
	return
}

func (m mediaItem) getURL() string {
	switch m.Type {
	case "animated_gif":
		fallthrough
	case "video":
		_, _, source := m.getLargestVideo()
		return source
	case "photo":
		// the size of the photo can be adjusted
		// when downloading by appending a size
		// to the end of the URL: ":thumb", ":small",
		// ":medium", ":large", or ":orig" -- but
		// we don't do that here, only do that when
		// actually downloading
		if m.MediaURLHTTPS != "" {
			return m.MediaURLHTTPS
		}
		return m.MediaURL
	}
	return ""
}

type additionalMediaInfo struct {
	Monetizable bool `json:"monetizable"`
}

type videoInfo struct {
	AspectRatio    []transFloat    `json:"aspect_ratio"`
	DurationMillis transInt        `json:"duration_millis"`
	Variants       []videoVariants `json:"variants"`
}

type videoVariants struct {
	Bitrate     transInt `json:"bitrate,omitempty"`
	ContentType string   `json:"content_type,omitempty"`
	URL         string   `json:"url"`
}

type mediaSizes struct {
	Thumb  mediaSize `json:"thumb"`
	Small  mediaSize `json:"small"`
	Medium mediaSize `json:"medium"`
	Large  mediaSize `json:"large"`
}

type mediaSize struct {
	W      transInt `json:"w"`
	H      transInt `json:"h"`
	Resize string   `json:"resize"` // fit|crop
}

type twitterUser struct {
	ContributorsEnabled            bool             `json:"contributors_enabled"`
	CreatedAt                      string           `json:"created_at"`
	DefaultProfile                 bool             `json:"default_profile"`
	DefaultProfileImage            bool             `json:"default_profile_image"`
	Description                    string           `json:"description"`
	Entities                       *twitterEntities `json:"entities"`
	FavouritesCount                int              `json:"favourites_count"`
	FollowersCount                 int              `json:"followers_count"`
	Following                      any              `json:"following"`
	FollowRequestSent              any              `json:"follow_request_sent"`
	FriendsCount                   int              `json:"friends_count"`
	GeoEnabled                     bool             `json:"geo_enabled"`
	HasExtendedProfile             bool             `json:"has_extended_profile"`
	IsTranslationEnabled           bool             `json:"is_translation_enabled"`
	IsTranslator                   bool             `json:"is_translator"`
	Lang                           string           `json:"lang"`
	ListedCount                    int              `json:"listed_count"`
	Location                       string           `json:"location"`
	Name                           string           `json:"name"`
	Notifications                  any              `json:"notifications"`
	ProfileBackgroundColor         string           `json:"profile_background_color"`
	ProfileBackgroundImageURL      string           `json:"profile_background_image_url"`
	ProfileBackgroundImageURLHTTPS string           `json:"profile_background_image_url_https"`
	ProfileBackgroundTile          bool             `json:"profile_background_tile"`
	ProfileBannerURL               string           `json:"profile_banner_url"`
	ProfileImageURL                string           `json:"profile_image_url"`
	ProfileImageURLHTTPS           string           `json:"profile_image_url_https"`
	ProfileLinkColor               string           `json:"profile_link_color"`
	ProfileSidebarBorderColor      string           `json:"profile_sidebar_border_color"`
	ProfileSidebarFillColor        string           `json:"profile_sidebar_fill_color"`
	ProfileTextColor               string           `json:"profile_text_color"`
	ProfileUseBackgroundImage      bool             `json:"profile_use_background_image"`
	Protected                      bool             `json:"protected"`
	ScreenName                     string           `json:"screen_name"`
	StatusesCount                  int              `json:"statuses_count"`
	TimeZone                       any              `json:"time_zone"`
	TranslatorType                 string           `json:"translator_type"`
	URL                            string           `json:"url"`
	UserID                         transInt         `json:"id"`
	UserIDStr                      string           `json:"id_str"`
	UtcOffset                      any              `json:"utc_offset"`
	Verified                       bool             `json:"verified"`
}

type phoneNumberFile []struct {
	Device struct {
		PhoneNumber string `json:"phoneNumber"`
	} `json:"device"`
}

type profileFile []struct {
	Profile struct {
		Description struct {
			Bio      string `json:"bio"`
			Website  string `json:"website"`
			Location string `json:"location"`
		} `json:"description"`
		AvatarMediaURL string `json:"avatarMediaUrl"`
		HeaderMediaURL string `json:"headerMediaUrl"`
	} `json:"profile"`
}

type twitterAccountFile []struct {
	Account twitterAccount `json:"account"`
}

type twitterAccount struct {
	// fields from export archive file: account.js
	PhoneNumber        string `json:"phoneNumber"`
	Email              string `json:"email"`
	CreatedVia         string `json:"createdVia"`
	CreatedAt          string `json:"createdAt"`
	Username           string `json:"username"`
	AccountID          string `json:"accountId"`
	AccountDisplayName string `json:"accountDisplayName"`

	// info from file: phone-number.js
	PhoneNumbers phoneNumberFile

	// info from file: profile.js
	Profile profileFile

	// fields from API endpoint: GET /2/users[/by/username/...]
	Data struct {
		Verified    bool      `json:"verified"`
		CreatedAt   time.Time `json:"created_at"`
		Description string    `json:"description"`
		Location    string    `json:"location"`
		Entities    struct {
			URL struct {
				URLs []struct {
					Start       int    `json:"start"`
					End         int    `json:"end"`
					URL         string `json:"url"`
					ExpandedURL string `json:"expanded_url"`
					DisplayURL  string `json:"display_url"`
				} `json:"urls"`
			} `json:"url"`
			Description struct {
				Mentions []struct {
					Start    int    `json:"start"`
					End      int    `json:"end"`
					Username string `json:"username"`
				} `json:"mentions"`
			} `json:"description"`
		} `json:"entities"`
		PublicMetrics struct {
			FollowersCount int `json:"followers_count"`
			FollowingCount int `json:"following_count"`
			TweetCount     int `json:"tweet_count"`
			ListedCount    int `json:"listed_count"`
		} `json:"public_metrics"`
		URL             string `json:"url"`
		ProfileImageURL string `json:"profile_image_url"`
		Name            string `json:"name"`
		Protected       bool   `json:"protected"`
		PinnedTweetID   string `json:"pinned_tweet_id"`
		Username        string `json:"username"`
		ID              string `json:"id"`
	} `json:"data"`
}

// func (ta twitterAccount) screenName() string {
// 	if ta.Data.Username != "" {
// 		return ta.Data.Username // from API
// 	}
// 	return ta.Username // from archive file
// }

// func (ta twitterAccount) id() string {
// 	if ta.Data.ID != "" {
// 		return ta.Data.ID // from API
// 	}
// 	return ta.AccountID // from archive file
// }

// func (ta twitterAccount) name() string {
// 	if ta.Data.Name != "" {
// 		return ta.Data.Name // from API
// 	}
// 	return ta.AccountDisplayName // from archive file
// }

// entity returns a populated Entity from a populated twitterAccount.
func (ta twitterAccount) entity(_ context.Context, fsys fs.FS) timeline.Entity {
	ent := timeline.Entity{
		Name: ta.AccountDisplayName,
		Attributes: []timeline.Attribute{
			{
				Name:     identityAttribute,
				Value:    ta.AccountID,
				Identity: true,
			},
			{
				Name:        timeline.AttributeEmail,
				Value:       ta.Email,
				Identifying: true,
			},
			{
				Name:        timeline.AttributePhoneNumber,
				Value:       ta.PhoneNumber,
				Identifying: true,
			},
			{
				Name:        "twitter_username",
				Value:       ta.Username,
				Identifying: true,
			},
		},
	}

	for _, ph := range ta.PhoneNumbers {
		ent.Attributes = append(ent.Attributes, timeline.Attribute{
			Name:        timeline.AttributePhoneNumber,
			Value:       ph.Device.PhoneNumber,
			Identifying: true,
		})
	}

	if len(ta.Profile) > 0 {
		profile := ta.Profile[0].Profile
		if profile.AvatarMediaURL != "" {
			if fsys == nil {
				ent.NewPicture = timeline.DownloadData(profile.AvatarMediaURL)
			} else {
				ent.NewPicture = func(_ context.Context) (io.ReadCloser, error) {
					avatarFilename := ta.AccountID + "-" + path.Base(profile.AvatarMediaURL)
					picPath := path.Join("data", "profile_media", avatarFilename)
					return fsys.Open(picPath)
				}
			}
		}

		ent.Metadata = timeline.Metadata{
			"Twitter bio": profile.Description.Bio,
		}

		ent.Attributes = append(ent.Attributes, timeline.Attribute{
			Name:  "twitter_location",
			Value: profile.Description.Location,
		})
	}

	return ent
}

type directMessages struct {
	DMConversation dmConversation `json:"dmConversation"`
}

type dmConversation struct {
	ConversationID string `json:"conversationId"`
	Messages       []struct {
		MessageCreate struct {
			RecipientID string `json:"recipientId"`
			Reactions   []any  `json:"reactions"`
			URLs        []struct {
				URL      string `json:"url"`
				Expanded string `json:"expanded"`
				Display  string `json:"display"`
			} `json:"urls"`
			Text      string    `json:"text"`
			MediaURLs []string  `json:"mediaUrls"`
			SenderID  string    `json:"senderId"`
			ID        string    `json:"id"`
			CreatedAt time.Time `json:"createdAt"`
		} `json:"messageCreate"`
	} `json:"messages"`
}

type archiveManifest struct {
	UserInfo struct {
		AccountID   string `json:"accountId"`
		UserName    string `json:"userName"`
		DisplayName string `json:"displayName"`
	} `json:"userInfo"`
	ArchiveInfo struct {
		SizeBytes        string    `json:"sizeBytes"`
		GenerationDate   time.Time `json:"generationDate"`
		IsPartialArchive bool      `json:"isPartialArchive"`
		MaxPartSizeBytes string    `json:"maxPartSizeBytes"`
	} `json:"archiveInfo"`
	ReadmeInfo struct {
		FileName  string `json:"fileName"`
		Directory string `json:"directory"`
		Name      string `json:"name"`
	} `json:"readmeInfo"`
	DataTypes map[string]struct {
		Files []struct {
			FileName   string `json:"fileName"`
			GlobalName string `json:"globalName"`
			Count      string `json:"count"`
		} `json:"files"`
	} `json:"dataTypes"`
}

// transInt is an integer that could be
// unmarshaled from a string, too. This
// is needed because the archive JSON
// from Twitter uses all string values,
// but the same fields are integers with
// the API.
type transInt int64

func (ti *transInt) UnmarshalJSON(b []byte) error {
	if len(b) == 0 {
		return errors.New("no value")
	}
	b = bytes.Trim(b, "\"")
	var i int64
	err := json.Unmarshal(b, &i)
	if err != nil {
		return err
	}
	*ti = transInt(i)
	return nil
}

// transFloat is like transInt but for floats.
type transFloat float64

func (tf *transFloat) UnmarshalJSON(b []byte) error {
	if len(b) == 0 {
		return errors.New("no value")
	}
	b = bytes.Trim(b, "\"")
	var f float64
	err := json.Unmarshal(b, &f)
	if err != nil {
		return err
	}
	*tf = transFloat(f)
	return nil
}

const identityAttribute = "twitter_id"

const srcArchive = "archive"