package twitter import ( "context" "encoding/json" "fmt" "log" "net/http" "net/url" "strconv" "strings" "github.com/timelinize/timelinize/timeline" ) func (c *Client) getFromAPI(ctx context.Context, itemChan chan<- *timeline.ItemGraph, opt timeline.ListingOptions) error { // load any previous checkpoint c.checkpoint.load(opt.Checkpoint) // get account owner information cleanedScreenName := strings.TrimPrefix(c.acc.User.ID, "@") ownerAccount, err := c.getAccountFromAPI(cleanedScreenName, "") if err != nil { return fmt.Errorf("getting user account information for @%s: %v", cleanedScreenName, err) } c.ownerAccount = ownerAccount // get the starting bounds of this operation var maxTweet, minTweet string if opt.Timeframe.SinceItemID != nil { minTweet = *opt.Timeframe.SinceItemID } if c.checkpoint.LastTweetID != "" { // by default, start off at the last checkpoint maxTweet = c.checkpoint.LastTweetID if opt.Timeframe.UntilItemID != nil { // if both a timeframe UntilItemID and a checkpoint are set, // we will choose the one with a tweet ID that is higher, // meaning more recent, to avoid potentially skipping // a chunk of the timeline maxTweet = maxTweetID(c.checkpoint.LastTweetID, *opt.Timeframe.UntilItemID) } } for { select { case <-ctx.Done(): return nil default: tweets, err := c.nextPageOfTweetsFromAPI(maxTweet, minTweet) if err != nil { return fmt.Errorf("getting next page of tweets: %v", err) } // we are done when there are no more tweets if len(tweets) == 0 { return nil } // TODO: currently working on this for Twitter's v2 API // for _, t := range tweets { // err = c.processTweetFromAPI(t, itemChan) // if err != nil { // return fmt.Errorf("processing tweet from API: %v", err) // } // } // since max_id is inclusive, subtract 1 from the tweet ID // https://developer.twitter.com/en/docs/tweets/timelines/guides/working-with-timelines nextTweetID := tweets[len(tweets)-1].TweetID - 1 c.checkpoint.LastTweetID = strconv.FormatInt(int64(nextTweetID), 10) c.checkpoint.save(ctx) // decrease maxTweet to get the next page on next iteration maxTweet = c.checkpoint.LastTweetID } } } func (c *Client) processTweetFromAPI(t tweet, itemChan chan<- *timeline.ItemGraph, opt Options) error { skip, err := c.prepareTweet(&t, "api", opt) if err != nil { return fmt.Errorf("preparing tweet: %v", err) } if skip { return nil } ig, err := c.makeItemGraphFromTweet(t, nil, opt) if err != nil { return fmt.Errorf("processing tweet %s: %v", t.id(), err) } // send the tweet for processing if ig != nil { itemChan <- ig } return nil } // nextPageOfTweetsFromAPI returns the next page of tweets starting at maxTweet // and going for a full page or until minTweet, whichever comes first. Generally, // iterating over this function will involve decreasing maxTweet and leaving // minTweet the same, if set at all (maxTweet = "until", minTweet = "since"). // Either or both can be empty strings, for no boundaries. This function returns // at least 0 tweets (signaling done, I think) or up to a full page of tweets. func (c *Client) nextPageOfTweetsFromAPI(maxTweet, minTweet string) ([]tweet, error) { log.Println("TWITTER ID:", c.ownerAccount.id()) // TODO: Gah, the v2 API does not return media URL. This rewrite is impossible to complete without that. That's like omitting the text of a tweet. q := url.Values{ "query": {"from:" + c.ownerAccount.id()}, "max_results": {"0"}, // TODO: from 10 to 100, use 100 once I'm done testing "tweet.fields": {"attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,id,in_reply_to_user_id,lang,public_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld"}, "media.fields": {"duration_ms,height,media_key,preview_image_url,type,url,width,public_metrics"}, "place.fields": {"contained_within,country,country_code,full_name,geo,id,name,place_type"}, "expansions": {"attachments.media_keys"}, // others: https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent // TODO: from v1... // "tweet_mode": {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates // "exclude_replies": {"false"}, // always include replies in case it's a self-reply; we can filter all others // "include_rts": {"false"}, // or true if c.Retweets } if minTweet != "" { q.Set("since_id", minTweet) } if maxTweet != "" { q.Set("until_id", maxTweet) } u := apiBase + "/tweets/search/recent?" + q.Encode() log.Println("URL:", u) resp, err := c.HTTPClient.Get(u) if err != nil { return nil, fmt.Errorf("performing API request: %v", err) } defer resp.Body.Close() // TODO: handle HTTP errors, esp. rate limiting, a lot better if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("HTTP error: %s: %s", u, resp.Status) } var tweets recentTweetsResponse err = json.NewDecoder(resp.Body).Decode(&tweets) if err != nil { return nil, fmt.Errorf("reading response body: %v", err) } log.Printf("RESPONSE: %+v", tweets) return nil, nil } // getAccountFromAPI gets the account information for either // screenName, if set, or accountID, if set. Set only one; // leave the other argument empty string. func (c *Client) getAccountFromAPI(screenName, accountID string) (twitterAccount, error) { var ta twitterAccount q := make(url.Values) if screenName != "" { q.Set("screen_name", screenName) } else if accountID != "" { q.Set("user_id", accountID) } u := "https://api.twitter.com/1.1/users/show.json?" + q.Encode() resp, err := c.HTTPClient.Get(u) if err != nil { return ta, fmt.Errorf("performing API request: %v", err) } defer resp.Body.Close() // TODO: handle HTTP errors, esp. rate limiting, a lot better if resp.StatusCode != http.StatusOK { return ta, fmt.Errorf("HTTP error: %s: %s", u, resp.Status) } err = json.NewDecoder(resp.Body).Decode(&ta) if err != nil { return ta, fmt.Errorf("reading response body: %v", err) } return ta, nil } func (c *Client) getTweetFromAPI(id string) (tweet, error) { var t tweet q := url.Values{ "id": {id}, "tweet_mode": {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates } u := "https://api.twitter.com/1.1/statuses/show.json?" + q.Encode() resp, err := c.HTTPClient.Get(u) if err != nil { return t, fmt.Errorf("performing API request: %v", err) } defer resp.Body.Close() switch resp.StatusCode { case http.StatusNotFound: // this is okay, because the tweet may simply have been deleted, // and we skip empty tweets anyway fallthrough case http.StatusForbidden: // this happens when the author's account is suspended return t, nil case http.StatusOK: break default: // TODO: handle HTTP errors, esp. rate limiting, a lot better return t, fmt.Errorf("HTTP error: %s: %s", u, resp.Status) } err = json.NewDecoder(resp.Body).Decode(&t) if err != nil { return t, fmt.Errorf("reading response body: %v", err) } return t, nil } const apiBase = "https://api.twitter.com/2"