Fix card link parsing edge cases

This commit is contained in:
Zed 2019-07-15 13:40:59 +02:00
parent 829cac9cbd
commit 0da076ddcf
3 changed files with 10 additions and 13 deletions

View file

@ -10,7 +10,6 @@ const
emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
usernameRegex = re"(^|[^A-z0-9_?])@([A-z0-9_]+)" usernameRegex = re"(^|[^A-z0-9_?])@([A-z0-9_]+)"
picRegex = re"pic.twitter.com/[^ ]+" picRegex = re"pic.twitter.com/[^ ]+"
cardRegex = re"(https?://)?cards.twitter.com/[^ ]+"
ellipsisRegex = re" ?…" ellipsisRegex = re" ?…"
nbsp = $Rune(0x000A0) nbsp = $Rune(0x000A0)
@ -60,7 +59,6 @@ proc linkifyText*(text: string): string =
proc stripTwitterUrls*(text: string): string = proc stripTwitterUrls*(text: string): string =
result = text result = text
result = result.replace(picRegex, "") result = result.replace(picRegex, "")
result = result.replace(cardRegex, "")
result = result.replace(ellipsisRegex, "") result = result.replace(ellipsisRegex, "")
proc getUserpic*(userpic: string; style=""): string = proc getUserpic*(userpic: string; style=""): string =
@ -81,7 +79,3 @@ proc getTime*(tweet: Tweet): string =
proc getLink*(tweet: Tweet | Quote): string = proc getLink*(tweet: Tweet | Quote): string =
&"/{tweet.profile.username}/status/{tweet.id}" &"/{tweet.profile.username}/status/{tweet.id}"
proc getUrls*(text: string): seq[string] =
# temporary
text.findAll(urlRegex).mapIt(text[it.group(0)[0]])

View file

@ -75,7 +75,7 @@ proc parseTweet*(node: XmlNode): Tweet =
) )
result.getTweetMedia(tweet) result.getTweetMedia(tweet)
result.getTweetCards(tweet) result.getTweetCard(tweet)
let by = tweet.selectText(".js-retweet-text > a > b") let by = tweet.selectText(".js-retweet-text > a > b")
if by.len > 0: if by.len > 0:
@ -197,7 +197,10 @@ proc parseCard*(card: var Card; node: XmlNode) =
card.text = node.selectText("p.tcu-resetMargin") card.text = node.selectText("p.tcu-resetMargin")
card.dest = node.selectText("span.SummaryCard-destination") card.dest = node.selectText("span.SummaryCard-destination")
let image = node.select(".tcu-imageWrapper > img") if card.url.len == 0:
card.url = node.select("a").attr("href")
let image = node.select(".tcu-imageWrapper img")
if image != nil: if image != nil:
# workaround for issue 11713 # workaround for issue 11713
card.image = image.attr("data-src").replace("gname", "g&name") card.image = image.attr("data-src").replace("gname", "g&name")

View file

@ -167,7 +167,7 @@ proc getQuoteMedia*(quote: var Quote; node: XmlNode) =
elif gifBadge != nil: elif gifBadge != nil:
quote.badge = "GIF" quote.badge = "GIF"
proc getTweetCards*(tweet: Tweet; node: XmlNode) = proc getTweetCard*(tweet: Tweet; node: XmlNode) =
if node.attr("data-has-cards") == "false": return if node.attr("data-has-cards") == "false": return
let cardType = node.attr("data-card2-type") let cardType = node.attr("data-card2-type")
@ -183,10 +183,10 @@ proc getTweetCards*(tweet: Tweet; node: XmlNode) =
query: cardDiv.attr("data-src") query: cardDiv.attr("data-src")
) )
# temporary solution let cardUrl = cardDiv.attr("data-card-url")
let text = node.selectText(".tweet-text") for n in node.selectAll(".tweet-text a"):
let urls = getUrls(text) if n.attr("href") == cardUrl:
card.url = urls[0] card.url = n.attr("data-expanded-url")
tweet.card = some(card) tweet.card = some(card)