1
0
Fork 0
timelinize/datasources/twitter/api.go.v1
2024-08-11 08:02:27 -06:00

227 lines
7 KiB
Text

package twitter
import (
"context"
"encoding/json"
"fmt"
"log"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/timelinize/timelinize/timeline"
)
func (c *Client) getFromAPI(ctx context.Context, itemChan chan<- *timeline.ItemGraph, opt timeline.ListingOptions) error {
// load any previous checkpoint
c.checkpoint.load(opt.Checkpoint)
// get account owner information
cleanedScreenName := strings.TrimPrefix(c.acc.User.ID, "@")
ownerAccount, err := c.getAccountFromAPI(cleanedScreenName, "")
if err != nil {
return fmt.Errorf("getting user account information for @%s: %v", cleanedScreenName, err)
}
c.ownerAccount = ownerAccount
// get the starting bounds of this operation
var maxTweet, minTweet string
if opt.Timeframe.SinceItemID != nil {
minTweet = *opt.Timeframe.SinceItemID
}
if c.checkpoint.LastTweetID != "" {
// by default, start off at the last checkpoint
maxTweet = c.checkpoint.LastTweetID
if opt.Timeframe.UntilItemID != nil {
// if both a timeframe UntilItemID and a checkpoint are set,
// we will choose the one with a tweet ID that is higher,
// meaning more recent, to avoid potentially skipping
// a chunk of the timeline
maxTweet = maxTweetID(c.checkpoint.LastTweetID, *opt.Timeframe.UntilItemID)
}
}
for {
select {
case <-ctx.Done():
return nil
default:
tweets, err := c.nextPageOfTweetsFromAPI(maxTweet, minTweet)
if err != nil {
return fmt.Errorf("getting next page of tweets: %v", err)
}
// we are done when there are no more tweets
if len(tweets) == 0 {
return nil
}
// TODO: currently working on this for Twitter's v2 API
// for _, t := range tweets {
// err = c.processTweetFromAPI(t, itemChan)
// if err != nil {
// return fmt.Errorf("processing tweet from API: %v", err)
// }
// }
// since max_id is inclusive, subtract 1 from the tweet ID
// https://developer.twitter.com/en/docs/tweets/timelines/guides/working-with-timelines
nextTweetID := tweets[len(tweets)-1].TweetID - 1
c.checkpoint.LastTweetID = strconv.FormatInt(int64(nextTweetID), 10)
c.checkpoint.save(ctx)
// decrease maxTweet to get the next page on next iteration
maxTweet = c.checkpoint.LastTweetID
}
}
}
func (c *Client) processTweetFromAPI(t tweet, itemChan chan<- *timeline.ItemGraph, opt Options) error {
skip, err := c.prepareTweet(&t, "api", opt)
if err != nil {
return fmt.Errorf("preparing tweet: %v", err)
}
if skip {
return nil
}
ig, err := c.makeItemGraphFromTweet(t, nil, opt)
if err != nil {
return fmt.Errorf("processing tweet %s: %v", t.id(), err)
}
// send the tweet for processing
if ig != nil {
itemChan <- ig
}
return nil
}
// nextPageOfTweetsFromAPI returns the next page of tweets starting at maxTweet
// and going for a full page or until minTweet, whichever comes first. Generally,
// iterating over this function will involve decreasing maxTweet and leaving
// minTweet the same, if set at all (maxTweet = "until", minTweet = "since").
// Either or both can be empty strings, for no boundaries. This function returns
// at least 0 tweets (signaling done, I think) or up to a full page of tweets.
func (c *Client) nextPageOfTweetsFromAPI(maxTweet, minTweet string) ([]tweet, error) {
log.Println("TWITTER ID:", c.ownerAccount.id())
// TODO: Gah, the v2 API does not return media URL. This rewrite is impossible to complete without that. That's like omitting the text of a tweet.
q := url.Values{
"query": {"from:" + c.ownerAccount.id()},
"max_results": {"0"}, // TODO: from 10 to 100, use 100 once I'm done testing
"tweet.fields": {"attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,id,in_reply_to_user_id,lang,public_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld"},
"media.fields": {"duration_ms,height,media_key,preview_image_url,type,url,width,public_metrics"},
"place.fields": {"contained_within,country,country_code,full_name,geo,id,name,place_type"},
"expansions": {"attachments.media_keys"},
// others: https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent
// TODO: from v1...
// "tweet_mode": {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates
// "exclude_replies": {"false"}, // always include replies in case it's a self-reply; we can filter all others
// "include_rts": {"false"}, // or true if c.Retweets
}
if minTweet != "" {
q.Set("since_id", minTweet)
}
if maxTweet != "" {
q.Set("until_id", maxTweet)
}
u := apiBase + "/tweets/search/recent?" + q.Encode()
log.Println("URL:", u)
resp, err := c.HTTPClient.Get(u)
if err != nil {
return nil, fmt.Errorf("performing API request: %v", err)
}
defer resp.Body.Close()
// TODO: handle HTTP errors, esp. rate limiting, a lot better
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
}
var tweets recentTweetsResponse
err = json.NewDecoder(resp.Body).Decode(&tweets)
if err != nil {
return nil, fmt.Errorf("reading response body: %v", err)
}
log.Printf("RESPONSE: %+v", tweets)
return nil, nil
}
// getAccountFromAPI gets the account information for either
// screenName, if set, or accountID, if set. Set only one;
// leave the other argument empty string.
func (c *Client) getAccountFromAPI(screenName, accountID string) (twitterAccount, error) {
var ta twitterAccount
q := make(url.Values)
if screenName != "" {
q.Set("screen_name", screenName)
} else if accountID != "" {
q.Set("user_id", accountID)
}
u := "https://api.twitter.com/1.1/users/show.json?" + q.Encode()
resp, err := c.HTTPClient.Get(u)
if err != nil {
return ta, fmt.Errorf("performing API request: %v", err)
}
defer resp.Body.Close()
// TODO: handle HTTP errors, esp. rate limiting, a lot better
if resp.StatusCode != http.StatusOK {
return ta, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
}
err = json.NewDecoder(resp.Body).Decode(&ta)
if err != nil {
return ta, fmt.Errorf("reading response body: %v", err)
}
return ta, nil
}
func (c *Client) getTweetFromAPI(id string) (tweet, error) {
var t tweet
q := url.Values{
"id": {id},
"tweet_mode": {"extended"}, // https://developer.twitter.com/en/docs/tweets/tweet-updates
}
u := "https://api.twitter.com/1.1/statuses/show.json?" + q.Encode()
resp, err := c.HTTPClient.Get(u)
if err != nil {
return t, fmt.Errorf("performing API request: %v", err)
}
defer resp.Body.Close()
switch resp.StatusCode {
case http.StatusNotFound:
// this is okay, because the tweet may simply have been deleted,
// and we skip empty tweets anyway
fallthrough
case http.StatusForbidden:
// this happens when the author's account is suspended
return t, nil
case http.StatusOK:
break
default:
// TODO: handle HTTP errors, esp. rate limiting, a lot better
return t, fmt.Errorf("HTTP error: %s: %s", u, resp.Status)
}
err = json.NewDecoder(resp.Body).Decode(&t)
if err != nil {
return t, fmt.Errorf("reading response body: %v", err)
}
return t, nil
}
const apiBase = "https://api.twitter.com/2"