From ba57511a019aea10e2464fc4d3ba3e12294d0c4c Mon Sep 17 00:00:00 2001 From: Zed Date: Sun, 19 Jan 2020 08:34:32 +0100 Subject: [PATCH] Add workaround for Twitter's layout A/B testing Fixes #110 --- src/api/cookie.nim | 16 ++++++++++++++++ src/api/list.nim | 2 +- src/api/profile.nim | 2 +- src/api/search.nim | 5 +++-- src/api/timeline.nim | 4 ++-- src/api/tweet.nim | 2 +- src/api/utils.nim | 11 +++++++---- src/cache.nim | 3 ++- src/routes/rss.nim | 3 ++- src/routes/timeline.nim | 16 ++++++++-------- 10 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 src/api/cookie.nim diff --git a/src/api/cookie.nim b/src/api/cookie.nim new file mode 100644 index 0000000..6261a1d --- /dev/null +++ b/src/api/cookie.nim @@ -0,0 +1,16 @@ +import httpclient, strutils + +proc getGuestId*(): string = + let client = newHttpClient() + for i in 0 .. 10: + try: + let req = client.get("https://twitter.com") + if "react-root" in req.body: continue + for k, v in req.headers: + if "guest_id" in v: + return v[v.find("=") + 1 .. v.find(";")] + except: + discard + finally: + try: client.close() + except: discard diff --git a/src/api/list.nim b/src/api/list.nim index c92fe6e..a46efa0 100644 --- a/src/api/list.nim +++ b/src/api/list.nim @@ -54,7 +54,7 @@ proc getListMembers*(username, list, after, agent: string): Future[Result[Profil let url = base / (listMembersUrl % [username, list]) - html = await fetchHtml(url, genHeaders(agent, url)) + html = await fetchHtml(url, genHeaders(agent, url, guestId=true)) result = Result[Profile]( minId: html.selectAttr(".stream-container", "data-min-position"), diff --git a/src/api/profile.nim b/src/api/profile.nim index f66337f..89f80f0 100644 --- a/src/api/profile.nim +++ b/src/api/profile.nim @@ -34,7 +34,7 @@ proc getProfile*(username, agent: string): Future[Profile] {.async.} = proc getProfileFull*(username, agent: string): Future[Profile] {.async.} = let url = base / username - headers = genHeaders(agent, url, auth=true) + headers = genHeaders(agent, url, auth=true, guestId=true) html = await fetchHtml(url, headers) if html == nil: return diff --git a/src/api/search.nim b/src/api/search.nim index 0554d78..7d1b20c 100644 --- a/src/api/search.nim +++ b/src/api/search.nim @@ -14,7 +14,8 @@ proc getResult*[T](json: JsonNode; query: Query; after: string): Result[T] = beginning: after.len == 0 ) -proc getSearch*[T](query: Query; after, agent: string; media=true): Future[Result[T]] {.async.} = +proc getSearch*[T](query: Query; after, agent: string; + media=true): Future[Result[T]] {.async.} = let kind = if query.kind == users: "users" else: "tweets" @@ -22,7 +23,7 @@ proc getSearch*[T](query: Query; after, agent: string; media=true): Future[Resul encoded = encodeUrl(param, usePlus=false) referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded]) - headers = genHeaders(agent, referer, auth=true, xml=true) + headers = genHeaders(agent, referer, auth=true, xml=true, guestId=true) params = { "f": kind, diff --git a/src/api/timeline.nim b/src/api/timeline.nim index 1058637..5aea170 100644 --- a/src/api/timeline.nim +++ b/src/api/timeline.nim @@ -30,12 +30,12 @@ proc getProfileAndTimeline*(username, after, agent: string; url = url ? {"max_position": after} let - headers = genHeaders(agent, base / username, auth=true) + headers = genHeaders(agent, base / username, auth=true, guestId=true) html = await fetchHtml(url, headers) timeline = parseTimeline(html.select("#timeline > .stream-container"), after) profile = parseTimelineProfile(html) - if media: await getMedia(timeline, agent) + if media and profile.username.len > 0: await getMedia(timeline, agent) result = (profile, timeline) proc getTimeline*(username, after, agent: string; diff --git a/src/api/tweet.nim b/src/api/tweet.nim index 8f5ff17..028645c 100644 --- a/src/api/tweet.nim +++ b/src/api/tweet.nim @@ -8,7 +8,7 @@ proc getTweet*(username, id, after, agent: string): Future[Conversation] {.async headers = genHeaders({ "pragma": "no-cache", "x-previous-page-name": "profile" - }, agent, base, xml=true) + }, agent, base, xml=true, guestId=true) url = base / username / tweetUrl / id ? {"max_position": after} html = await fetchHtml(url, headers) diff --git a/src/api/utils.nim b/src/api/utils.nim index 96dcec7..fa2c75b 100644 --- a/src/api/utils.nim +++ b/src/api/utils.nim @@ -2,11 +2,13 @@ import httpclient, asyncdispatch, htmlparser, options import strutils, json, xmltree, uri import ../types -import consts +import consts, cookie + +var guestIdCookie = "guest_id=" & getGuestId() proc genHeaders*(headers: openArray[tuple[key: string, val: string]]; agent: string; referer: Uri; lang=true; - auth=false; xml=false): HttpHeaders = + auth=false; xml=false; guestId=false): HttpHeaders = result = newHttpHeaders({ "referer": $referer, "user-agent": agent, @@ -16,13 +18,14 @@ proc genHeaders*(headers: openArray[tuple[key: string, val: string]]; if auth: result["authority"] = "twitter.com" if lang: result["accept-language"] = consts.lang if xml: result["x-requested-with"] = "XMLHttpRequest" + if guestId: result["cookie"] = guestIdCookie for (key, val) in headers: result[key] = val proc genHeaders*(agent: string; referer: Uri; lang=true; - auth=false; xml=false): HttpHeaders = - genHeaders([], agent, referer, lang, auth, xml) + auth=false; xml=false; guestId=false): HttpHeaders = + genHeaders([], agent, referer, lang, auth, xml, guestId) template newClient*() {.dirty.} = var client = newAsyncHttpClient() diff --git a/src/cache.nim b/src/cache.nim index 7916165..2adee32 100644 --- a/src/cache.nim +++ b/src/cache.nim @@ -41,7 +41,8 @@ proc hasCachedProfile*(username: string): Option[Profile] = except AssertionError, KeyError: result = none Profile -proc getCachedProfile*(username, agent: string; force=false): Future[Profile] {.async.} = +proc getCachedProfile*(username, agent: string; + force=false): Future[Profile] {.async.} = withDb: try: result.getOne("lower(username) = ?", toLower(username)) diff --git a/src/routes/rss.nim b/src/routes/rss.nim index 93c0d6e..f6d4f69 100644 --- a/src/routes/rss.nim +++ b/src/routes/rss.nim @@ -20,7 +20,8 @@ proc showRss*(req: Request; hostname: string; query: Query): Future[(string, str (profile, timeline) = await fetchSingleTimeline(names[0], after, getAgent(), query, media=false) else: - timeline = await fetchMultiTimeline(names, after, getAgent(), query, media=false) + let multiQuery = query.getMultiQuery(names) + timeline = await getSearch[Tweet](multiQuery, after, getAgent(), media=false) # this is kinda dumb profile = Profile( username: name, diff --git a/src/routes/timeline.nim b/src/routes/timeline.nim index 9ed23b3..916d09f 100644 --- a/src/routes/timeline.nim +++ b/src/routes/timeline.nim @@ -45,13 +45,11 @@ proc fetchSingleTimeline*(name, after, agent: string; query: Query; if profile.username.len == 0: return return (profile, timeline) -proc fetchMultiTimeline*(names: seq[string]; after, agent: string; query: Query; - media=true): Future[Timeline] {.async.} = - var q = query - q.fromUser = names +proc getMultiQuery*(q: Query; names: seq[string]): Query = + result = q + result.fromUser = names if q.kind == posts and "replies" notin q.excludes: - q.excludes.add "replies" - return await getSearch[Tweet](q, after, agent, media) + result.excludes.add "replies" proc get*(req: Request; key: string): string = params(req).getOrDefault(key) @@ -62,8 +60,10 @@ proc showTimeline*(request: Request; query: Query; cfg: Config; prefs: Prefs; let names = getNames(request.get("name")) if names.len != 1: - let timeline = await fetchMultiTimeline(names, after, agent, query) - let html = renderTweetSearch(timeline, prefs, getPath()) + let + multiQuery = query.getMultiQuery(names) + timeline = await getSearch[Tweet](multiQuery, after, agent) + html = renderTweetSearch(timeline, prefs, getPath()) return renderMain(html, request, cfg, "Multi", rss=rss) let