From 02fcd7b88068013d5d3277eff324819e4956dbf3 Mon Sep 17 00:00:00 2001 From: Zed Date: Wed, 26 Jun 2019 18:51:21 +0200 Subject: [PATCH] Switch from nimquery to q --- nitter.nimble | 6 ++++- src/api.nim | 6 ++--- src/parser.nim | 36 ++++++++++++++------------- src/parserutils.nim | 59 +++++++++++++++++++++++++-------------------- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/nitter.nimble b/nitter.nimble index 0feaf8c..3d2eaea 100644 --- a/nitter.nimble +++ b/nitter.nimble @@ -11,4 +11,8 @@ bin = @["nitter"] # Dependencies requires "nim >= 0.19.9" -requires "regex", "nimcrypto", "norm", "jester", "nimquery#head" +requires "norm >= 1.0.11" +requires "jester >= 0.4.1" +requires "regex >= 0.11.2" +requires "q >= 0.0.7" +requires "nimcrypto >= 0.3.9" diff --git a/src/api.nim b/src/api.nim index 9a222cc..60755dc 100644 --- a/src/api.nim +++ b/src/api.nim @@ -1,8 +1,8 @@ import httpclient, asyncdispatch, htmlparser, times import sequtils, strutils, strformat, json, xmltree, uri -import nimquery, regex +import regex -import ./types, ./parser +import ./types, ./parser, ./parserutils const agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" @@ -162,7 +162,7 @@ proc getProfile*(username: string): Future[Profile] {.async.} = if html.isNil: return Profile() - if not html.querySelector(".ProfileCard-sensitiveWarningContainer").isNil: + if not html.select(".ProfileCard-sensitiveWarningContainer").isNil: return await getProfileFallback(username, headers) result = parsePopupProfile(html) diff --git a/src/parser.nim b/src/parser.nim index bf0cec5..a2c501f 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -1,10 +1,9 @@ import xmltree, sequtils, strtabs, strutils, strformat, json -import nimquery import ./types, ./parserutils, ./formatters proc parsePopupProfile*(node: XmlNode): Profile = - let profile = node.querySelector(".profile-card") + let profile = node.select(".profile-card") if profile.isNil: return result = Profile( @@ -24,9 +23,9 @@ proc parseIntentProfile*(profile: XmlNode): Profile = fullname: profile.getName("a.fn.url.alternate-context"), username: profile.getUsername(".nickname"), bio: profile.getBio("p.note"), - userpic: profile.querySelector(".profile.summary").getAvatar("img.photo"), - verified: not profile.querySelector("li.verified").isNil, - protected: not profile.querySelector("li.protected").isNil, + userpic: profile.select(".profile.summary").getAvatar("img.photo"), + verified: not profile.select("li.verified").isNil, + protected: not profile.select("li.protected").isNil, banner: getBanner(profile) ) @@ -55,7 +54,11 @@ proc parseQuote*(quote: XmlNode): Quote = result.getQuoteMedia(quote) -proc parseTweet*(tweet: XmlNode): Tweet = +proc parseTweet*(node: XmlNode): Tweet = + let tweet = node.select(".tweet") + if tweet.isNil(): + return Tweet() + result = Tweet( id: tweet.getAttr("data-item-id"), link: tweet.getAttr("data-permalink-path"), @@ -74,29 +77,28 @@ proc parseTweet*(tweet: XmlNode): Tweet = result.retweetBy = some(by.stripText()) result.retweetId = some(tweet.getAttr("data-retweet-id")) - let quote = tweet.querySelector(".QuoteTweet-innerContainer") + let quote = tweet.select(".QuoteTweet-innerContainer") if not quote.isNil: result.quote = some(parseQuote(quote)) proc parseTweets*(node: XmlNode): Tweets = - if node.isNil: return - node.querySelectorAll(".tweet").map(parseTweet) + if node.isNil or node.kind == xnText: return + for n in node.selectAll(".stream-item"): + result.add parseTweet(n) proc parseConversation*(node: XmlNode): Conversation = result = Conversation( - tweet: parseTweet(node.querySelector(".permalink-tweet-container > .tweet")), - before: parseTweets(node.querySelector(".in-reply-to")) + tweet: parseTweet(node.select(".permalink-tweet-container")), + before: parseTweets(node.select(".in-reply-to")) ) - let replies = node.querySelector(".replies-to") + let replies = node.select(".replies-to") if replies.isNil: return - result.after = parseTweets(replies.querySelector(".ThreadedConversation--selfThread")) + result.after = parseTweets(replies.select(".ThreadedConversation--selfThread")) - for reply in replies.querySelectorAll("li > .stream-items"): - let thread = parseTweets(reply) - if not thread.anyIt(it in result.after): - result.replies.add thread + for reply in replies.select(".stream-items"): + result.replies.add parseTweets(reply) proc parseVideo*(node: JsonNode): Video = let track = node{"track"} diff --git a/src/parserutils.nim b/src/parserutils.nim index c2652a0..3feb2d2 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -1,30 +1,39 @@ import xmltree, htmlparser, strtabs, strformat, times -import nimquery, regex +import regex import ./types, ./formatters, ./api +from q import nil + const thumbRegex = re".+:url\('([^']+)'\)" gifRegex = re".+thumb/([^\.']+)\.jpg.*" +proc selectAll*(node: XmlNode; selector: string): seq[XmlNode] = + q.select(node, selector) + +proc select*(node: XmlNode; selector: string): XmlNode = + let nodes = node.selectAll(selector) + if nodes.len > 0: nodes[0] else: nil + proc getAttr*(node: XmlNode; attr: string; default=""): string = if node.isNil or node.attrs.isNil: return default return node.attrs.getOrDefault(attr) proc selectAttr*(node: XmlNode; selector: string; attr: string; default=""): string = - let res = node.querySelector(selector) + let res = node.select(selector) if res == nil: "" else: res.getAttr(attr, default) proc selectText*(node: XmlNode; selector: string): string = - let res = node.querySelector(selector) + let res = node.select(selector) result = if res == nil: "" else: res.innerText() proc getHeader(profile: XmlNode): XmlNode = - result = profile.querySelector(".permalink-header") + result = profile.select(".permalink-header") if result.isNil: - result = profile.querySelector(".stream-item-header") + result = profile.select(".stream-item-header") if result.isNil: - result = profile.querySelector(".ProfileCard-userFields") + result = profile.select(".ProfileCard-userFields") proc isVerified*(profile: XmlNode): bool = getHeader(profile).selectText(".Icon.Icon--verified").len > 0 @@ -39,25 +48,23 @@ proc getUsername*(profile: XmlNode; selector: string): string = profile.selectText(selector).strip(chars={'@', ' '}) proc emojify*(node: XmlNode) = - for i in node.querySelectorAll(".Emoji"): + for i in node.selectAll(".Emoji"): i.add newText(i.getAttr("alt")) proc getQuoteText*(tweet: XmlNode): string = - let text = tweet.querySelector(".QuoteTweet-text") - + let text = tweet.select(".QuoteTweet-text") emojify(text) - result = stripText(selectText(text, ".tweet-text")) + result = stripText(text.innerText()) result = stripTwitterUrls(result) proc getTweetText*(tweet: XmlNode): string = let - selector = ".tweet-text > a.twitter-timeline-link.u-hidden" - link = tweet.selectAttr(selector, "data-expanded-url") - quote = tweet.querySelector(".QuoteTweet") - text = tweet.querySelector(".tweet-text") + quote = tweet.select(".QuoteTweet") + text = tweet.select(".tweet-text") + link = text.selectAttr("a.twitter-timeline-link.u-hidden", "data-expanded-url") emojify(text) - result = stripText(selectText(text, ".tweet-text")) + result = stripText(text.innerText()) if not quote.isNil and link.len > 0: result = result.replace(link, "") @@ -65,7 +72,7 @@ proc getTweetText*(tweet: XmlNode): string = result = stripTwitterUrls(result) proc getTime(tweet: XmlNode): XmlNode = - tweet.querySelector(".js-short-timestamp") + tweet.select(".js-short-timestamp") proc getTimestamp*(tweet: XmlNode): Time = let time = getTime(tweet).getAttr("data-time", "0") @@ -92,7 +99,7 @@ proc getBanner*(tweet: XmlNode): string = result = "background-color: #161616" proc getPopupStats*(profile: var Profile; node: XmlNode) = - for s in node.querySelectorAll( ".ProfileCardStats-statLink"): + for s in node.selectAll( ".ProfileCardStats-statLink"): let text = s.getAttr("title").split(" ")[0] case s.getAttr("href").split("/")[^1] of "followers": profile.followers = text @@ -101,7 +108,7 @@ proc getPopupStats*(profile: var Profile; node: XmlNode) = proc getIntentStats*(profile: var Profile; node: XmlNode) = profile.tweets = "?" - for s in node.querySelectorAll( "dd.count > a"): + for s in node.selectAll( "dd.count > a"): let text = s.innerText() case s.getAttr("href").split("/")[^1] of "followers": profile.followers = text @@ -111,7 +118,7 @@ proc getTweetStats*(tweet: Tweet; node: XmlNode) = tweet.replies = "0" tweet.retweets = "0" tweet.likes = "0" - for action in node.querySelectorAll(".ProfileTweet-actionCountForAria"): + for action in node.selectAll(".ProfileTweet-actionCountForAria"): let text = action.innerText.split() case text[1] of "replies": tweet.replies = text[0] @@ -126,30 +133,30 @@ proc getGif(player: XmlNode): Gif = Gif(url: url, thumb: thumb) proc getTweetMedia*(tweet: Tweet; node: XmlNode) = - for photo in node.querySelectorAll(".AdaptiveMedia-photoContainer"): + for photo in node.selectAll(".AdaptiveMedia-photoContainer"): tweet.photos.add photo.attrs["data-image-url"] - let player = node.querySelector(".PlayableMedia") + let player = node.select(".PlayableMedia") if player.isNil: return if "gif" in player.getAttr("class"): - tweet.gif = some(getGif(player.querySelector(".PlayableMedia-player"))) + tweet.gif = some(getGif(player.select(".PlayableMedia-player"))) elif "video" in player.getAttr("class"): tweet.video = some(Video()) proc getQuoteMedia*(quote: var Quote; node: XmlNode) = - let sensitive = node.querySelector(".QuoteTweet--sensitive") + let sensitive = node.select(".QuoteTweet--sensitive") if not sensitive.isNil: quote.sensitive = true return - let media = node.querySelector(".QuoteMedia") + let media = node.select(".QuoteMedia") if not media.isNil: quote.thumb = some(media.selectAttr("img", "src")) - let badge = node.querySelector(".AdaptiveMedia-badgeText") - let gifBadge = node.querySelector(".Icon--gifBadge") + let badge = node.select(".AdaptiveMedia-badgeText") + let gifBadge = node.select(".Icon--gifBadge") if not badge.isNil: quote.badge = some(badge.innerText())