From 4167ce458bbd292cf78fd056de1dfeada66f9bca Mon Sep 17 00:00:00 2001 From: Zed Date: Mon, 1 Jun 2020 02:14:29 +0200 Subject: [PATCH] Out with the old --- src/api/consts.nim | 24 ---- src/api/list.nim | 65 ---------- src/api/media.nim | 159 ----------------------- src/api/profile.nim | 41 ------ src/api/resolver.nim | 13 -- src/api/search.nim | 53 -------- src/api/timeline.nim | 75 ----------- src/api/tweet.nim | 61 --------- src/api/utils.nim | 64 ---------- src/cache.nim | 91 -------------- src/parser.nim | 290 ------------------------------------------ src/parserutils.nim | 294 ------------------------------------------- 12 files changed, 1230 deletions(-) delete mode 100644 src/api/consts.nim delete mode 100644 src/api/list.nim delete mode 100644 src/api/media.nim delete mode 100644 src/api/profile.nim delete mode 100644 src/api/resolver.nim delete mode 100644 src/api/search.nim delete mode 100644 src/api/timeline.nim delete mode 100644 src/api/tweet.nim delete mode 100644 src/api/utils.nim delete mode 100644 src/cache.nim delete mode 100644 src/parser.nim delete mode 100644 src/parserutils.nim diff --git a/src/api/consts.nim b/src/api/consts.nim deleted file mode 100644 index 9c76162..0000000 --- a/src/api/consts.nim +++ /dev/null @@ -1,24 +0,0 @@ -import uri - -const - lang* = "en-US,en;q=0.9" - auth* = "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw" - htmlAccept* = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3" - jsonAccept* = "application/json, text/javascript, */*; q=0.01" - - base* = parseUri("https://twitter.com/") - apiBase* = parseUri("https://api.twitter.com/1.1/") - - timelineUrl* = "i/profiles/show/$1/timeline/tweets" - timelineMediaUrl* = "i/profiles/show/$1/media_timeline" - listUrl* = "$1/lists/$2/timeline" - listMembersUrl* = "$1/lists/$2/members" - profilePopupUrl* = "i/profiles/popup" - profileIntentUrl* = "intent/user" - searchUrl* = "i/search/timeline" - tweetUrl* = "status" - repliesUrl* = "i/$1/conversation/$2" - videoUrl* = "videos/tweet/config/$1.json" - tokenUrl* = "guest/activate.json" - cardUrl* = "i/cards/tfw/v1/$1" - pollUrl* = cardUrl & "?cardname=poll2choice_text_only&lang=en" diff --git a/src/api/list.nim b/src/api/list.nim deleted file mode 100644 index c92fe6e..0000000 --- a/src/api/list.nim +++ /dev/null @@ -1,65 +0,0 @@ -import httpclient, asyncdispatch, htmlparser -import sequtils, strutils, json, uri - -import ".."/[types, parser, parserutils, query] -import utils, consts, timeline, search - -proc getListTimeline*(username, list, after, agent: string; media=true): Future[Timeline] {.async.} = - let url = base / (listUrl % [username, list]) - - var params = toSeq({ - "include_available_features": "1", - "include_entities": "1", - "reset_error_state": "false" - }) - - if after.len > 0: - params.add {"max_position": after} - - let json = await fetchJson(url ? params, genHeaders(agent, url)) - result = await finishTimeline(json, Query(), after, agent, media) - if result.content.len == 0: - return - - result.minId = getLastId(result) - -proc getListMembersSearch(username, list, after, agent: string): Future[Result[Profile]] {.async.} = - let - referer = base / (listMembersUrl % [username, list]) - url = referer / "timeline" - headers = genHeaders({"x-push-with": "XMLHttpRequest"}, agent, referer, xml=true) - - var params = toSeq({ - "include_available_features": "1", - "include_entities": "1", - "reset_error_state": "false" - }) - - if after.len > 0: - params.add {"max_position": after} - - let json = await fetchJson(url ? params, headers) - - result = getResult[Profile](json, Query(kind: userList), after) - if json == nil or not json.hasKey("items_html"): return - - let html = json["items_html"].to(string) - result.hasMore = html != "\n" - for p in parseHtml(html).selectAll(".account"): - result.content.add parseListProfile(p) - -proc getListMembers*(username, list, after, agent: string): Future[Result[Profile]] {.async.} = - if after.len > 0: - return await getListMembersSearch(username, list, after, agent) - - let - url = base / (listMembersUrl % [username, list]) - html = await fetchHtml(url, genHeaders(agent, url)) - - result = Result[Profile]( - minId: html.selectAttr(".stream-container", "data-min-position"), - hasMore: html.select(".has-more-items") != nil, - beginning: true, - query: Query(kind: userList), - content: html.selectAll(".account").map(parseListProfile) - ) diff --git a/src/api/media.nim b/src/api/media.nim deleted file mode 100644 index 0e17d72..0000000 --- a/src/api/media.nim +++ /dev/null @@ -1,159 +0,0 @@ -import httpclient, asyncdispatch, times, sequtils, strutils, json, uri -import macros, options - -import ".."/[types, parser, formatters, cache] -import utils, consts - -var - guestToken = "" - tokenUses = 0 - tokenMaxUses = 230 - tokenUpdated: Time - tokenLifetime = initDuration(minutes=20) - -macro genMediaGet(media: untyped; token=false) = - let - mediaName = capitalizeAscii($media) - multi = ident("get" & mediaName & "s") - convo = ident("getConversation" & mediaName & "s") - replies = ident("getReplies" & mediaName & "s") - single = ident("get" & mediaName) - - quote do: - proc `multi`*(thread: Chain | Timeline; agent: string; token="") {.async.} = - if thread == nil: return - var `media` = thread.content.filterIt(it.`media`.isSome) - when `token`: - var gToken = token - if gToken.len == 0: gToken = await getGuestToken(agent) - await all(`media`.mapIt(`single`(it, token, agent))) - else: - await all(`media`.mapIt(`single`(it, agent))) - - proc `replies`*(replies: Result[Chain]; agent: string; token="") {.async.} = - when `token`: - var gToken = token - if gToken.len == 0: gToken = await getGuestToken(agent) - await all(replies.content.mapIt(`multi`(it, agent, token=gToken))) - else: - await all(replies.content.mapIt(`multi`(it, agent))) - - proc `convo`*(convo: Conversation; agent: string) {.async.} = - var futs: seq[Future[void]] - when `token`: - var token = await getGuestToken(agent) - futs.add `single`(convo.tweet, agent, token) - futs.add `multi`(convo.before, agent, token=token) - futs.add `multi`(convo.after, agent, token=token) - if convo.replies != nil: - futs.add `replies`(convo.replies, agent, token=token) - else: - futs.add `single`(convo.tweet, agent) - futs.add `multi`(convo.before, agent) - futs.add `multi`(convo.after, agent) - if convo.replies != nil: - futs.add `replies`(convo.replies, agent) - await all(futs) - -proc getGuestToken(agent: string; force=false): Future[string] {.async.} = - if getTime() - tokenUpdated < tokenLifetime and - not force and tokenUses < tokenMaxUses: - return guestToken - - tokenUpdated = getTime() - tokenUses = 0 - - let headers = genHeaders({"authorization": auth}, agent, base, lang=false) - newClient() - - var res: string - try: res = await client.postContent($(apiBase / tokenUrl)) - except: return - - let json = parseJson(res) - - if json != nil: - result = json["guest_token"].to(string) - guestToken = result - -proc getVideoVar(tweet: Tweet): var Option[Video] = - if tweet.card.isSome(): - return get(tweet.card).video - else: - return tweet.video - -proc getVideoFetch(tweet: Tweet; agent, token: string; retry=true): Future[Option[Video]] {.async.} = - if tweet.video.isNone(): return - - let - headers = genHeaders({"authorization": auth, "x-guest-token": token}, - agent, base / getLink(tweet, focus=false), lang=false) - url = apiBase / (videoUrl % $tweet.id) - json = await fetchJson(url, headers) - - if json == nil: - if not retry: return - if getTime() - tokenUpdated > initDuration(seconds=1): - tokenUpdated = getTime() - discard await getGuestToken(agent, force=true) - return await getVideoFetch(tweet, agent, guestToken, retry=false) - - var video = parseVideo(json, tweet.id) - video.title = get(tweet.video).title - video.description = get(tweet.video).description - cache(video) - - result = some video - tokenUses.inc - -proc videoIsInvalid(video: Video): bool = - not video.available and video.url.len == 0 - -proc getVideo*(tweet: Tweet; agent, token: string; force=false) {.async.} = - let token = if token.len == 0: guestToken else: token - var video = getCachedVideo(tweet.id) - if video.isNone: - video = await getVideoFetch(tweet, agent, token) - elif videoIsInvalid(get(video)) and tweet.gif.isSome: - # gif was mistakenly parsed as a gif - uncache(tweet.id) - return - - getVideoVar(tweet) = video - if tweet.card.isSome: tweet.video = none Video - -proc getPoll*(tweet: Tweet; agent: string) {.async.} = - if tweet.poll.isNone(): return - - let - headers = genHeaders(agent, base / getLink(tweet, focus=false), auth=true) - url = base / (pollUrl % $tweet.id) - html = await fetchHtml(url, headers) - - if html == nil: return - tweet.poll = some parsePoll(html) - -proc getCard*(tweet: Tweet; agent: string) {.async.} = - if tweet.card.isNone(): return - - let - headers = genHeaders(agent, base / getLink(tweet, focus=false), auth=true) - query = get(tweet.card).query.replace("sensitive=true", "sensitive=false") - html = await fetchHtml(base / query, headers) - - if html == nil: return - parseCard(get(tweet.card), html) - -proc getPhotoRail*(username, agent: string; skip=false): Future[seq[GalleryPhoto]] {.async.} = - if skip: return - let - headers = genHeaders(agent, base / username, xml=true) - params = {"for_photo_rail": "true", "oldest_unread_id": "0"} - url = base / (timelineMediaUrl % username) ? params - html = await fetchHtml(url, headers, jsonKey="items_html") - - result = parsePhotoRail(html) - -genMediaGet(video, token=true) -genMediaGet(poll) -genMediaGet(card) diff --git a/src/api/profile.nim b/src/api/profile.nim deleted file mode 100644 index f66337f..0000000 --- a/src/api/profile.nim +++ /dev/null @@ -1,41 +0,0 @@ -import httpclient, asyncdispatch, times, strutils, uri - -import ".."/[types, parser, parserutils] - -import utils, consts - -proc getProfileFallback(username: string; headers: HttpHeaders): Future[Profile] {.async.} = - let url = base / profileIntentUrl ? {"screen_name": username} - let html = await fetchHtml(url, headers) - if html == nil: return Profile() - - result = parseIntentProfile(html) - -proc getProfile*(username, agent: string): Future[Profile] {.async.} = - let - headers = genHeaders(agent, base / username, xml=true) - - params = { - "screen_name": username, - "wants_hovercard": "true", - "_": $(epochTime().int) - } - - url = base / profilePopupUrl ? params - html = await fetchHtml(url, headers, jsonKey="html") - - if html == nil: return Profile() - - if html.select(".ProfileCard-sensitiveWarningContainer") != nil: - return await getProfileFallback(username, headers) - - result = parsePopupProfile(html) - -proc getProfileFull*(username, agent: string): Future[Profile] {.async.} = - let - url = base / username - headers = genHeaders(agent, url, auth=true) - html = await fetchHtml(url, headers) - - if html == nil: return - result = parseTimelineProfile(html) diff --git a/src/api/resolver.nim b/src/api/resolver.nim deleted file mode 100644 index ad3ad10..0000000 --- a/src/api/resolver.nim +++ /dev/null @@ -1,13 +0,0 @@ -import asyncdispatch, httpclient - -import ".."/[formatters, types] - -proc resolve*(url: string; prefs: Prefs): Future[string] {.async.} = - let client = newAsyncHttpClient(maxRedirects=0) - try: - let resp = await client.request(url, $HttpHead) - result = resp.headers["location"].replaceUrl(prefs) - except: - discard - finally: - client.close() diff --git a/src/api/search.nim b/src/api/search.nim deleted file mode 100644 index af80688..0000000 --- a/src/api/search.nim +++ /dev/null @@ -1,53 +0,0 @@ -import httpclient, asyncdispatch, htmlparser -import strutils, json, xmltree, uri - -import ".."/[types, parser, parserutils, query] -import utils, consts, timeline - -proc getResult*[T](json: JsonNode; query: Query; after: string): Result[T] = - if json == nil: return Result[T](beginning: true, query: query) - Result[T]( - hasMore: json{"has_more_items"}.getBool(false), - maxId: json{"max_position"}.getStr, - minId: json{"min_position"}.getStr, - query: query, - beginning: after.len == 0 - ) - -proc getSearch*[T](query: Query; after, agent: string; - media=true): Future[Result[T]] {.async.} = - let - kind = if query.kind == users: "users" else: "tweets" - - param = genQueryParam(query) - encoded = encodeUrl(param, usePlus=false) - - referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded]) - headers = genHeaders(agent, referer, auth=true, xml=true) - - params = { - "f": kind, - "vertical": "default", - "q": param, - "src": "typd", - "include_available_features": "1", - "include_entities": "1", - "max_position": if after.len > 0: after else: "0", - "reset_error_state": "false" - } - - if param in ["include:nativeretweets", "-filter:nativeretweets", ""]: - return Result[T](query: query, beginning: true) - - let json = await fetchJson(base / searchUrl ? params, headers) - - result = getResult[T](json, query, after) - if json == nil or not json.hasKey("items_html"): return - - when T is Tweet: - result = await finishTimeline(json, query, after, agent, media) - elif T is Profile: - let html = json["items_html"].to(string) - result.hasMore = html != "\n" - for p in parseHtml(html).selectAll(".js-stream-item"): - result.content.add parsePopupProfile(p, ".ProfileCard") diff --git a/src/api/timeline.nim b/src/api/timeline.nim deleted file mode 100644 index cd10e56..0000000 --- a/src/api/timeline.nim +++ /dev/null @@ -1,75 +0,0 @@ -import httpclient, asyncdispatch, htmlparser, strformat -import sequtils, strutils, json, uri - -import ".."/[types, parser, parserutils, formatters, query] -import utils, consts, media, search - -proc getMedia(thread: Chain | Timeline; agent: string) {.async.} = - await all(getVideos(thread, agent), - getCards(thread, agent), - getPolls(thread, agent)) - -proc finishTimeline*(json: JsonNode; query: Query; after, agent: string; - media=true): Future[Timeline] {.async.} = - result = getResult[Tweet](json, query, after) - if json == nil: return - - if json["new_latent_count"].to(int) == 0: return - if not json.hasKey("items_html"): return - - let html = parseHtml(json["items_html"].to(string)) - let timeline = parseChain(html) - - if media: await getMedia(timeline, agent) - result.content = timeline.content - -proc getProfileAndTimeline*(username, after, agent: string; - media=true): Future[(Profile, Timeline)] {.async.} = - var url = base / username - if after.len > 0: - url = url ? {"max_position": after} - - let - headers = genHeaders(agent, base / username, auth=true) - html = await fetchHtml(url, headers) - timeline = parseTimeline(html.select("#timeline > .stream-container"), after) - profile = parseTimelineProfile(html) - - if media and profile.username.len > 0: await getMedia(timeline, agent) - result = (profile, timeline) - -proc getTimeline*(username, after, agent: string; - media=true): Future[Timeline] {.async.} = - var params = toSeq({ - "include_available_features": "1", - "include_entities": "1", - "include_new_items_bar": "false", - "reset_error_state": "false" - }) - - if after.len > 0: - params.add {"max_position": after} - - let headers = genHeaders(agent, base / username, xml=true) - let json = await fetchJson(base / (timelineUrl % username) ? params, headers) - - result = await finishTimeline(json, Query(), after, agent, media) - -proc getMediaTimeline*(username, after, agent: string; - media=true): Future[Timeline] {.async.} = - var params = toSeq({ - "include_available_features": "1", - "include_entities": "1", - "reset_error_state": "false" - }) - - if after.len > 0: - params.add {"max_position": after} - - let - headers = genHeaders(agent, base / username, xml=true) - json = await fetchJson(base / (timelineMediaUrl % username) ? params, headers) - query = Query(kind: QueryKind.media) - - result = await finishTimeline(json, query, after, agent, media) - result.minId = getLastId(result) diff --git a/src/api/tweet.nim b/src/api/tweet.nim deleted file mode 100644 index 2ba8df7..0000000 --- a/src/api/tweet.nim +++ /dev/null @@ -1,61 +0,0 @@ -import asyncdispatch, strutils, uri, httpclient, json, xmltree, htmlparser - -import ".."/[types, parser] -import utils, consts, media - -proc getTweet*(username, id, after, agent: string): Future[Conversation] {.async.} = - let - headers = genHeaders({ - "pragma": "no-cache", - "x-previous-page-name": "profile", - "accept": htmlAccept - }, agent, base, xml=true) - - url = base / username / tweetUrl / id ? {"max_position": after} - - newClient() - var html: XmlNode - try: - let resp = await client.get($url) - if resp.code == Http403 and "suspended" in (await resp.body): - return Conversation(tweet: Tweet(tombstone: "User has been suspended")) - html = parseHtml(await resp.body) - except: - discard - - if html == nil: return - - result = parseConversation(html, after) - - await all(getConversationVideos(result, agent), - getConversationCards(result, agent), - getConversationPolls(result, agent)) - -proc getReplies*(username, id, after, agent: string): Future[Result[Chain]] {.async.} = - let - headers = genHeaders({ - "pragma": "no-cache", - "x-previous-page-name": "permalink", - "accept": htmlAccept - }, agent, base, xml=true) - - params = { - "include_available_features": "1", - "include_entities": "1", - "max_position": after, - } - - url = base / (repliesUrl % [username, id]) ? params - - let json = await fetchJson(url, headers) - if json == nil or not json.hasKey("items_html"): return - let html = parseHtml(json{"items_html"}.getStr) - - result = parseReplies(html) - result.minId = json{"min_position"}.getStr(result.minId) - if result.minId.len > 0: - result.hasMore = true - - await all(getRepliesVideos(result, agent), - getRepliesCards(result, agent), - getRepliesPolls(result, agent)) diff --git a/src/api/utils.nim b/src/api/utils.nim deleted file mode 100644 index 96dcec7..0000000 --- a/src/api/utils.nim +++ /dev/null @@ -1,64 +0,0 @@ -import httpclient, asyncdispatch, htmlparser, options -import strutils, json, xmltree, uri - -import ../types -import consts - -proc genHeaders*(headers: openArray[tuple[key: string, val: string]]; - agent: string; referer: Uri; lang=true; - auth=false; xml=false): HttpHeaders = - result = newHttpHeaders({ - "referer": $referer, - "user-agent": agent, - "x-twitter-active-user": "yes", - }) - - if auth: result["authority"] = "twitter.com" - if lang: result["accept-language"] = consts.lang - if xml: result["x-requested-with"] = "XMLHttpRequest" - - for (key, val) in headers: - result[key] = val - -proc genHeaders*(agent: string; referer: Uri; lang=true; - auth=false; xml=false): HttpHeaders = - genHeaders([], agent, referer, lang, auth, xml) - -template newClient*() {.dirty.} = - var client = newAsyncHttpClient() - defer: - try: client.close() - except: discard - client.headers = headers - -proc fetchHtml*(url: Uri; headers: HttpHeaders; jsonKey = ""): Future[XmlNode] {.async.} = - headers["accept"] = htmlAccept - newClient() - - var resp = "" - try: - resp = await client.getContent($url) - except: - return nil - - if jsonKey.len > 0: - resp = parseJson(resp)[jsonKey].str - return parseHtml(resp) - -proc fetchJson*(url: Uri; headers: HttpHeaders): Future[JsonNode] {.async.} = - headers["accept"] = jsonAccept - newClient() - - try: - let resp = await client.getContent($url) - result = parseJson(resp) - except: - return nil - -proc getLastId*(tweets: Result[Tweet]): string = - if tweets.content.len == 0: return - let last = tweets.content[^1] - if last.retweet.isNone: - $last.id - else: - $(get(last.retweet).id) diff --git a/src/cache.nim b/src/cache.nim deleted file mode 100644 index f3a2bb0..0000000 --- a/src/cache.nim +++ /dev/null @@ -1,91 +0,0 @@ -import asyncdispatch, times, strutils -import norm/sqlite - -import types, api/profile - -template safeAddColumn(field: typedesc): untyped = - try: field.addColumn - except DbError: discard - -dbFromTypes("cache.db", "", "", "", [Profile, Video]) - -withDb: - Video.createTable(force=true) - try: Profile.createTable() - except DbError: discard - - safeAddColumn Profile.lowername - safeAddColumn Profile.suspended - -var profileCacheTime = initDuration(minutes=10) - -proc isOutdated*(profile: Profile): bool = - getTime() - profile.updated > profileCacheTime - -proc cache*(profile: var Profile) = - withDb: - try: - let p = Profile.getOne("lowername = ?", profile.lowername) - profile.id = p.id - profile.update() - except KeyError: - if profile.username.len > 0: - profile.insert() - -proc hasCachedProfile*(username: string): Option[Profile] = - withDb: - try: - let p = Profile.getOne("lowername = ?", toLower(username)) - doAssert not p.isOutdated - result = some p - except AssertionError, KeyError: - result = none Profile - -proc getCachedProfile*(username, agent: string; - force=false): Future[Profile] {.async.} = - withDb: - try: - result.getOne("lowername = ?", toLower(username)) - doAssert not result.isOutdated - except AssertionError, KeyError: - result = await getProfileFull(username, agent) - cache(result) - -proc setProfileCacheTime*(minutes: int) = - profileCacheTime = initDuration(minutes=minutes) - -proc cache*(video: var Video) = - withDb: - try: - let v = Video.getOne("videoId = ?", video.videoId) - video.id = v.id - video.update() - except KeyError: - if video.videoId.len > 0: - video.insert() - -proc uncache*(id: int64) = - withDb: - try: - var video = Video.getOne("videoId = ?", $id) - video.delete() - except: - discard - -proc getCachedVideo*(id: int64): Option[Video] = - withDb: - try: - return some Video.getOne("videoId = ?", $id) - except KeyError: - return none Video - -proc cacheCleaner*() {.async.} = - while true: - await sleepAsync(profileCacheTime.inMilliseconds.int) - withDb: - let up = "updated<" & $toUnix(getTime() - profileCacheTime) - var profiles = Profile.getMany(10000, cond=up) - var videos = Video.getMany(10000, cond=up) - transaction: - for p in profiles.mitems: delete(p) - for v in videos.mitems: delete(v) diff --git a/src/parser.nim b/src/parser.nim deleted file mode 100644 index a48bba2..0000000 --- a/src/parser.nim +++ /dev/null @@ -1,290 +0,0 @@ -import xmltree, sequtils, strutils, json, options - -import types, parserutils, formatters - -proc parseJsonData*(node: XmlNode): JsonNode = - let jsonData = node.selectAttr("input.json-data", "value") - if jsonData.len > 0: - return parseJson(jsonData) - -proc parseTimelineProfile*(node: XmlNode): Profile = - let profile = node.select(".ProfileHeaderCard") - if profile == nil: - let data = parseJsonData(node) - if data != nil and data{"sectionName"}.getStr == "suspended": - let username = data{"internalReferer"}.getStr.strip(chars={'/'}) - return Profile(username: username, suspended: true) - return - - let pre = ".ProfileHeaderCard-" - let username = profile.getUsername(pre & "screenname") - result = Profile( - fullname: profile.getName(pre & "nameLink"), - username: username, - lowername: toLower(username), - joinDate: profile.getDate(pre & "joinDateText"), - website: profile.selectAttr(pre & "urlText a", "title"), - bio: profile.getBio(pre & "bio"), - location: getLocation(profile), - userpic: node.getAvatar(".profile-picture img"), - verified: isVerified(profile), - protected: isProtected(profile), - banner: getTimelineBanner(node), - media: getMediaCount(node) - ) - - result.getProfileStats(node.select(".ProfileNav-list")) - -proc parsePopupProfile*(node: XmlNode; selector=".profile-card"): Profile = - let profile = node.select(selector) - if profile == nil: return - - let username = profile.getUsername(".username") - result = Profile( - fullname: profile.getName(".fullname"), - username: username, - lowername: toLower(username), - bio: profile.getBio(".bio", fallback=".ProfileCard-bio"), - userpic: profile.getAvatar(".ProfileCard-avatarImage"), - verified: isVerified(profile), - protected: isProtected(profile), - banner: getBanner(profile) - ) - - result.getPopupStats(profile) - -proc parseListProfile*(profile: XmlNode): Profile = - result = Profile( - fullname: profile.getName(".fullname"), - username: profile.getUsername(".username"), - bio: profile.getBio(".bio").stripText(), - userpic: profile.getAvatar(".avatar"), - verified: isVerified(profile), - protected: isProtected(profile), - ) - -proc parseIntentProfile*(profile: XmlNode): Profile = - result = Profile( - fullname: profile.getName("a.fn.url.alternate-context"), - username: profile.getUsername(".nickname"), - bio: profile.getBio("p.note"), - userpic: profile.select(".profile.summary").getAvatar("img.photo"), - verified: profile.select("li.verified") != nil, - protected: profile.select("li.protected") != nil, - banner: getBanner(profile) - ) - - result.getIntentStats(profile) - -proc parseTweetProfile*(profile: XmlNode): Profile = - result = Profile( - fullname: profile.attr("data-name").stripText(), - username: profile.attr("data-screen-name"), - userpic: profile.getAvatar(".avatar"), - verified: isVerified(profile) - ) - -proc parseQuote*(quote: XmlNode): Quote = - result = Quote( - id: parseBiggestInt(quote.attr("data-item-id")), - text: getQuoteText(quote), - reply: parseTweetReply(quote), - hasThread: quote.select(".self-thread-context") != nil, - available: true - ) - - result.profile = Profile( - fullname: quote.selectText(".QuoteTweet-fullname").stripText(), - username: quote.attr("data-screen-name"), - verified: isVerified(quote) - ) - - result.getQuoteMedia(quote) - -proc parseTweet*(node: XmlNode): Tweet = - if node == nil: - return Tweet() - - if "withheld" in node.attr("class"): - return Tweet(tombstone: getTombstone(node.selectText(".Tombstone-label"))) - - let tweet = node.select(".tweet") - if tweet == nil: - return Tweet() - - result = Tweet( - id: parseBiggestInt(tweet.attr("data-item-id")), - threadId: parseBiggestInt(tweet.attr("data-conversation-id")), - text: getTweetText(tweet), - time: getTimestamp(tweet), - shortTime: getShortTime(tweet), - profile: parseTweetProfile(tweet), - stats: parseTweetStats(tweet), - reply: parseTweetReply(tweet), - mediaTags: getMediaTags(tweet), - location: getTweetLocation(tweet), - hasThread: tweet.select(".content > .self-thread-context") != nil, - pinned: "pinned" in tweet.attr("class"), - available: true - ) - - result.getTweetMedia(tweet) - result.getTweetCard(tweet) - - let by = tweet.selectText(".js-retweet-text > a > b") - if by.len > 0: - result.retweet = some Retweet( - by: stripText(by), - id: parseBiggestInt(tweet.attr("data-retweet-id")) - ) - - let quote = tweet.select(".QuoteTweet-innerContainer") - if quote != nil: - result.quote = some parseQuote(quote) - - let tombstone = tweet.select(".Tombstone") - if tombstone != nil: - if "unavailable" in tombstone.innerText(): - let quote = Quote(tombstone: getTombstone(node.selectText(".Tombstone-label"))) - result.quote = some quote - -proc parseChain*(nodes: XmlNode): Chain = - if nodes == nil: return - result = Chain() - for n in nodes.filterIt(it.kind != xnText): - let class = n.attr("class").toLower() - if "tombstone" in class or "unavailable" in class or "withheld" in class: - result.content.add Tweet() - elif "morereplies" in class: - result.more = getMoreReplies(n) - else: - result.content.add parseTweet(n) - -proc parseReplies*(replies: XmlNode; skipFirst=false): Result[Chain] = - new(result) - for i, reply in replies.filterIt(it.kind != xnText): - if skipFirst and i == 0: continue - let class = reply.attr("class").toLower() - if "lone" in class: - result.content.add parseChain(reply) - elif "showmore" in class: - result.minId = reply.selectAttr("button", "data-cursor") - result.hasMore = true - else: - result.content.add parseChain(reply.select(".stream-items")) - -proc parseConversation*(node: XmlNode; after: string): Conversation = - let tweet = node.select(".permalink-tweet-container") - - if tweet == nil: - return Conversation(tweet: parseTweet(node.select(".permalink-tweet-withheld"))) - - result = Conversation( - tweet: parseTweet(tweet), - before: parseChain(node.select(".in-reply-to .stream-items")), - ) - - if result.before != nil: - let maxId = node.selectAttr(".in-reply-to .stream-container", "data-max-position") - if maxId.len > 0: - result.before.more = -1 - - let replies = node.select(".replies-to .stream-items") - if replies == nil: return - - let nodes = replies.filterIt(it.kind != xnText and "self" in it.attr("class")) - if nodes.len > 0 and "self" in nodes[0].attr("class"): - result.after = parseChain(nodes[0].select(".stream-items")) - - result.replies = parseReplies(replies, result.after != nil) - - result.replies.beginning = after.len == 0 - if result.replies.minId.len == 0: - result.replies.minId = node.selectAttr(".replies-to .stream-container", "data-min-position") - result.replies.hasMore = node.select(".stream-footer .has-more-items") != nil - -proc parseTimeline*(node: XmlNode; after: string): Timeline = - if node == nil: return Timeline() - result = Timeline( - content: parseChain(node.select(".stream > .stream-items")).content, - minId: node.attr("data-min-position"), - maxId: node.attr("data-max-position"), - hasMore: node.select(".has-more-items") != nil, - beginning: after.len == 0 - ) - -proc parseVideo*(node: JsonNode; tweetId: int64): Video = - let - track = node{"track"} - cType = track["contentType"].to(string) - pType = track["playbackType"].to(string) - - case cType - of "media_entity": - result = Video( - playbackType: if "mp4" in pType: mp4 else: m3u8, - contentId: track["contentId"].to(string), - durationMs: track["durationMs"].to(int), - views: track["viewCount"].to(string), - url: track["playbackUrl"].to(string), - available: track{"mediaAvailability"}["status"].to(string) == "available", - reason: track{"mediaAvailability"}["reason"].to(string)) - of "vmap": - result = Video( - playbackType: vmap, - durationMs: track.getOrDefault("durationMs").getInt(0), - url: track["vmapUrl"].to(string), - available: true) - else: - echo "Can't parse video of type ", cType, " ", tweetId - - result.videoId = $tweetId - result.thumb = node["posterImage"].to(string) - -proc parsePoll*(node: XmlNode): Poll = - let - choices = node.selectAll(".PollXChoice-choice") - votes = node.selectText(".PollXChoice-footer--total") - - result.votes = votes.strip().split(" ")[0] - result.status = node.selectText(".PollXChoice-footer--time") - - for choice in choices: - for span in choice.select(".PollXChoice-choice--text").filterIt(it.kind != xnText): - if span.attr("class").len == 0: - result.options.add span.innerText() - elif "progress" in span.attr("class"): - result.values.add parseInt(span.innerText()[0 .. ^2]) - - var highest = 0 - for i, n in result.values: - if n > highest: - highest = n - result.leader = i - -proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] = - for img in node.selectAll(".tweet-media-img-placeholder"): - result.add GalleryPhoto( - url: img.attr("data-image-url"), - tweetId: img.attr("data-tweet-id"), - color: img.attr("background-color").replace("style: ", "") - ) - -proc parseCard*(card: var Card; node: XmlNode) = - card.title = node.selectText("h2.TwitterCard-title") - card.text = node.selectText("p.tcu-resetMargin") - card.dest = node.selectText("span.SummaryCard-destination") - - if card.url.len == 0: - card.url = node.selectAttr("a", "href") - if card.url.len == 0: - card.url = node.selectAttr(".ConvoCard-thankYouContent", "data-thank-you-url") - - let image = node.select(".tcu-imageWrapper img") - if image != nil: - # workaround for issue 11713 - card.image = some image.attr("data-src").replace("gname", "g&name") - - if card.kind == liveEvent: - card.text = card.title - card.title = node.selectText(".TwitterCard-attribution--category") diff --git a/src/parserutils.nim b/src/parserutils.nim deleted file mode 100644 index c79eb1e..0000000 --- a/src/parserutils.nim +++ /dev/null @@ -1,294 +0,0 @@ -import xmltree, times, uri, options, json -import strtabs, strformat, strutils, sequtils -import regex - -import types, formatters - -from q import nil -from htmlgen import a - -const - thumbRegex = re".+:url\('([^']+)'\)" - gifRegex = re".+thumb/([^\.']+)\.[jpng].*" - reColor = re"a:active \{\n +color: (#[A-Z0-9]+)" - -proc selectAll*(node: XmlNode; selector: string): seq[XmlNode] = - if node == nil: return - q.select(node, selector) - -proc select*(node: XmlNode; selector: string): XmlNode = - if node == nil: return - let nodes = node.selectAll(selector) - if nodes.len > 0: nodes[0] else: nil - -proc selectAttr*(node: XmlNode; selector: string; attr: string): string = - let res = node.select(selector) - if res == nil: "" else: res.attr(attr) - -proc selectText*(node: XmlNode; selector: string): string = - let res = node.select(selector) - result = if res == nil: "" else: res.innerText() - -proc getHeader(profile: XmlNode): XmlNode = - result = profile.select(".permalink-header") - if result == nil: - result = profile.select(".stream-item-header") - if result == nil: - result = profile.select(".ProfileCard-userFields") - if result == nil: - result = profile - -proc isVerified*(profile: XmlNode): bool = - getHeader(profile).select(".Icon.Icon--verified") != nil - -proc isProtected*(profile: XmlNode): bool = - getHeader(profile).select(".Icon.Icon--protected") != nil - -proc parseText*(text: XmlNode; skipLink=""): string = - if text == nil: return - for el in text: - case el.kind - of xnText: - result.add el - of xnElement: - if el.attrs == nil: - if el.tag == "strong": - result.add $el - continue - - let class = el.attr("class") - if "data-expanded-url" in el.attrs: - let url = el.attr("data-expanded-url") - if url == skipLink: continue - if "u-hidden" in class and result.len > 0: - result.add "\n" - result.add a(shortLink(url), href=url) - elif "ashtag" in class or "hashflag" in class: - let hash = el.innerText() - result.add a(hash, href=("/search?q=" & encodeUrl(hash))) - elif "atreply" in class: - result.add a(el.innerText(), href=el.attr("href")) - elif "Emoji" in class: - result.add el.attr("alt") - else: discard - -proc getQuoteText*(tweet: XmlNode): string = - parseText(tweet.select(".QuoteTweet-text")) - -proc getTweetText*(tweet: XmlNode): string = - let - quote = tweet.select(".QuoteTweet") - text = tweet.select(".tweet-text") - link = text.selectAttr("a.twitter-timeline-link.u-hidden", "data-expanded-url") - parseText(text, if quote != nil: link else: "") - -proc getTimestamp*(tweet: XmlNode): Time = - let time = tweet.selectAttr(".js-short-timestamp", "data-time") - fromUnix(if time.len > 0: parseBiggestInt(time) else: 0) - -proc getShortTime*(tweet: XmlNode): string = - tweet.selectText(".js-short-timestamp") - -proc getDate*(node: XmlNode; selector: string): Time = - let date = node.select(selector) - if date == nil: return - parseTime(date.attr("title"), "h:mm tt - d MMM YYYY", utc()) - -proc getName*(profile: XmlNode; selector: string): string = - profile.selectText(selector).stripText() - -proc getUsername*(profile: XmlNode; selector: string): string = - profile.selectText(selector).strip(chars={'@', ' ', '\n'}) - -proc getBio*(profile: XmlNode; selector: string; fallback=""): string = - var bio = profile.select(selector) - if bio == nil and fallback.len > 0: - bio = profile.select(fallback) - parseText(bio) - -proc getLocation*(profile: XmlNode): string = - let sel = ".ProfileHeaderCard-locationText" - result = profile.selectText(sel).stripText() - - let link = profile.selectAttr(sel & " a", "data-place-id") - if link.len > 0: - result &= ":" & link - -proc getAvatar*(profile: XmlNode; selector: string): string = - profile.selectAttr(selector, "src").getUserpic() - -proc getBanner*(node: XmlNode): string = - let url = node.selectAttr("svg > image", "xlink:href") - if url.len > 0: - result = url.replace("600x200", "1500x500") - else: - result = node.selectAttr(".ProfileCard-bg", "style") - result = result.replace("background-color: ", "") - - if result.len == 0: - result = "#161616" - -proc getTimelineBanner*(node: XmlNode): string = - let banner = node.select(".ProfileCanopy-headerBg img") - let img = banner.attr("src") - if img.len > 0: - return img - - let style = node.select("style").innerText() - var m: RegexMatch - if style.find(reColor, m): - return style[m.group(0)[0]] - -proc getMediaCount*(node: XmlNode): string = - let text = node.selectText(".PhotoRail-headingWithCount") - return text.stripText().split(" ")[0] - -proc getProfileStats*(profile: var Profile; node: XmlNode) = - for s in node.selectAll( ".ProfileNav-stat"): - let text = s.attr("title").split(" ")[0] - case s.attr("data-nav") - of "followers": profile.followers = text - of "following": profile.following = text - of "favorites": profile.likes = text - of "tweets": profile.tweets = text - -proc getPopupStats*(profile: var Profile; node: XmlNode) = - for s in node.selectAll( ".ProfileCardStats-statLink"): - let text = s.attr("title").split(" ")[0] - case s.attr("href").split("/")[^1] - of "followers": profile.followers = text - of "following": profile.following = text - else: profile.tweets = text - -proc getIntentStats*(profile: var Profile; node: XmlNode) = - profile.tweets = "?" - for s in node.selectAll( "dd.count > a"): - let text = s.innerText() - case s.attr("href").split("/")[^1] - of "followers": profile.followers = text - of "following": profile.following = text - -proc parseTweetStats*(node: XmlNode): TweetStats = - result = TweetStats() - for action in node.selectAll(".ProfileTweet-actionCountForAria"): - let text = action.innerText.split() - case text[1][0 .. 2] - of "ret": result.retweets = text[0].parseInt - of "rep": result.replies = text[0].parseInt - of "lik": result.likes = text[0].parseInt - -proc parseTweetReply*(node: XmlNode): seq[string] = - let reply = node.select(".ReplyingToContextBelowAuthor") - if reply == nil: return - - let selector = if "Quote" in node.attr("class"): "b" - else: "a b" - - result = reply.selectAll(selector).map(innerText) - -proc getGif(player: XmlNode): Gif = - let - thumb = player.attr("style").replace(thumbRegex, "$1") - id = thumb.replace(gifRegex, "$1") - url = &"https://video.twimg.com/tweet_video/{id}.mp4" - Gif(url: url, thumb: thumb) - -proc getTweetMedia*(tweet: Tweet; node: XmlNode) = - for photo in node.selectAll(".AdaptiveMedia-photoContainer"): - tweet.photos.add photo.attrs["data-image-url"] - - let player = node.select(".PlayableMedia") - if player == nil: return - - let attrib = player.select(".PlayableMedia-attribution") - if attrib != nil: - tweet.attribution = some Profile( - username: attrib.attr("href").strip(chars={'/'}), - fullname: attrib.selectText(".fullname"), - userpic: attrib.selectAttr(".avatar", "src") - ) - - if "gif" in player.attr("class"): - tweet.gif = some getGif(player.select(".PlayableMedia-player")) - elif "video" in player.attr("class"): - let - thumb = player.selectAttr(".PlayableMedia-player", "style").split("'") - desc = player.selectText(".PlayableMedia-description") - title = player.selectText(".PlayableMedia-title") - var video = Video(title: title, description: desc) - if thumb.len > 1: - video.thumb = thumb[^2] - tweet.video = some video - -proc getQuoteMedia*(quote: var Quote; node: XmlNode) = - if node.select(".QuoteTweet--sensitive") != nil: - quote.sensitive = true - return - - let media = node.select(".QuoteMedia") - if media != nil: - quote.thumb = media.selectAttr("img", "src") - - let badge = node.select(".AdaptiveMedia-badgeText") - let gifBadge = node.select(".Icon--gifBadge") - - if badge != nil: - quote.badge = badge.innerText() - elif gifBadge != nil: - quote.badge = "GIF" - -proc getTweetCard*(tweet: Tweet; node: XmlNode) = - if node.attr("data-has-cards") == "false": return - var cardType = node.attr("data-card2-type") - - if ":" in cardType: - cardType = cardType.split(":")[^1] - - if "poll" in cardType: - tweet.poll = some Poll() - return - - if "message_me" in cardType: - return - - let cardDiv = node.select(".card2 > .js-macaw-cards-iframe-container") - if cardDiv == nil: return - - var card = Card( - id: $tweet.id, - query: cardDiv.attr("data-src") - ) - - try: - card.kind = parseEnum[CardKind](cardType) - except ValueError: - card.kind = summary - - let cardUrl = cardDiv.attr("data-card-url") - for n in node.selectAll(".tweet-text a"): - if n.attr("href") == cardUrl: - card.url = n.attr("data-expanded-url") - - tweet.card = some card - -proc getMoreReplies*(node: XmlNode): int64 = - let text = node.innerText().strip() - try: - result = parseBiggestInt(text.split(" ")[0]) - except: - result = -1 - -proc getMediaTags*(node: XmlNode): seq[Profile] = - let usernames = node.attr("data-tagged") - if usernames.len == 0: return - let users = parseJson(node.attr("data-reply-to-users-json")) - for user in users: - let un = user["screen_name"].getStr - if un notin usernames: continue - result.add Profile(username: un, fullname: user["name"].getStr) - -proc getTweetLocation*(node: XmlNode): string = - let geo = node.select(".js-geo-pivot-link") - if geo == nil: return - result = geo.innerText().stripText() - result &= ":" & geo.attr("data-place-id")