Fix card link parsing edge cases
This commit is contained in:
parent
829cac9cbd
commit
0da076ddcf
3 changed files with 10 additions and 13 deletions
|
@ -10,7 +10,6 @@ const
|
||||||
emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
|
emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
|
||||||
usernameRegex = re"(^|[^A-z0-9_?])@([A-z0-9_]+)"
|
usernameRegex = re"(^|[^A-z0-9_?])@([A-z0-9_]+)"
|
||||||
picRegex = re"pic.twitter.com/[^ ]+"
|
picRegex = re"pic.twitter.com/[^ ]+"
|
||||||
cardRegex = re"(https?://)?cards.twitter.com/[^ ]+"
|
|
||||||
ellipsisRegex = re" ?…"
|
ellipsisRegex = re" ?…"
|
||||||
nbsp = $Rune(0x000A0)
|
nbsp = $Rune(0x000A0)
|
||||||
|
|
||||||
|
@ -60,7 +59,6 @@ proc linkifyText*(text: string): string =
|
||||||
proc stripTwitterUrls*(text: string): string =
|
proc stripTwitterUrls*(text: string): string =
|
||||||
result = text
|
result = text
|
||||||
result = result.replace(picRegex, "")
|
result = result.replace(picRegex, "")
|
||||||
result = result.replace(cardRegex, "")
|
|
||||||
result = result.replace(ellipsisRegex, "")
|
result = result.replace(ellipsisRegex, "")
|
||||||
|
|
||||||
proc getUserpic*(userpic: string; style=""): string =
|
proc getUserpic*(userpic: string; style=""): string =
|
||||||
|
@ -81,7 +79,3 @@ proc getTime*(tweet: Tweet): string =
|
||||||
|
|
||||||
proc getLink*(tweet: Tweet | Quote): string =
|
proc getLink*(tweet: Tweet | Quote): string =
|
||||||
&"/{tweet.profile.username}/status/{tweet.id}"
|
&"/{tweet.profile.username}/status/{tweet.id}"
|
||||||
|
|
||||||
proc getUrls*(text: string): seq[string] =
|
|
||||||
# temporary
|
|
||||||
text.findAll(urlRegex).mapIt(text[it.group(0)[0]])
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ proc parseTweet*(node: XmlNode): Tweet =
|
||||||
)
|
)
|
||||||
|
|
||||||
result.getTweetMedia(tweet)
|
result.getTweetMedia(tweet)
|
||||||
result.getTweetCards(tweet)
|
result.getTweetCard(tweet)
|
||||||
|
|
||||||
let by = tweet.selectText(".js-retweet-text > a > b")
|
let by = tweet.selectText(".js-retweet-text > a > b")
|
||||||
if by.len > 0:
|
if by.len > 0:
|
||||||
|
@ -197,7 +197,10 @@ proc parseCard*(card: var Card; node: XmlNode) =
|
||||||
card.text = node.selectText("p.tcu-resetMargin")
|
card.text = node.selectText("p.tcu-resetMargin")
|
||||||
card.dest = node.selectText("span.SummaryCard-destination")
|
card.dest = node.selectText("span.SummaryCard-destination")
|
||||||
|
|
||||||
let image = node.select(".tcu-imageWrapper > img")
|
if card.url.len == 0:
|
||||||
|
card.url = node.select("a").attr("href")
|
||||||
|
|
||||||
|
let image = node.select(".tcu-imageWrapper img")
|
||||||
if image != nil:
|
if image != nil:
|
||||||
# workaround for issue 11713
|
# workaround for issue 11713
|
||||||
card.image = image.attr("data-src").replace("gname", "g&name")
|
card.image = image.attr("data-src").replace("gname", "g&name")
|
||||||
|
|
|
@ -167,7 +167,7 @@ proc getQuoteMedia*(quote: var Quote; node: XmlNode) =
|
||||||
elif gifBadge != nil:
|
elif gifBadge != nil:
|
||||||
quote.badge = "GIF"
|
quote.badge = "GIF"
|
||||||
|
|
||||||
proc getTweetCards*(tweet: Tweet; node: XmlNode) =
|
proc getTweetCard*(tweet: Tweet; node: XmlNode) =
|
||||||
if node.attr("data-has-cards") == "false": return
|
if node.attr("data-has-cards") == "false": return
|
||||||
let cardType = node.attr("data-card2-type")
|
let cardType = node.attr("data-card2-type")
|
||||||
|
|
||||||
|
@ -183,10 +183,10 @@ proc getTweetCards*(tweet: Tweet; node: XmlNode) =
|
||||||
query: cardDiv.attr("data-src")
|
query: cardDiv.attr("data-src")
|
||||||
)
|
)
|
||||||
|
|
||||||
# temporary solution
|
let cardUrl = cardDiv.attr("data-card-url")
|
||||||
let text = node.selectText(".tweet-text")
|
for n in node.selectAll(".tweet-text a"):
|
||||||
let urls = getUrls(text)
|
if n.attr("href") == cardUrl:
|
||||||
card.url = urls[0]
|
card.url = n.attr("data-expanded-url")
|
||||||
|
|
||||||
tweet.card = some(card)
|
tweet.card = some(card)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue