Revert "Improve tweet url and hashtag parsing"

This reverts commit d67ed26817.
This commit is contained in:
Zed 2020-11-14 23:37:07 +01:00
parent d67ed26817
commit 50218bcc4d

View file

@ -1,4 +1,4 @@
import strutils, times, macros, htmlgen, unicode, options, algorithm import strutils, times, macros, htmlgen, unicode, options
import regex, packedjson import regex, packedjson
import types, utils, formatters import types, utils, formatters
@ -6,18 +6,9 @@ const
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})" unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
unReplace = "$1<a href=\"/$2\">@$2</a>" unReplace = "$1<a href=\"/$2\">@$2</a>"
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)" htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>" htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
type
ReplaceSliceKind = enum
rkRemove, rkUrl, rkHashtag, rkMention
ReplaceSlice = object
slice: Slice[int]
kind: ReplaceSliceKind
url, display: string
template isNull*(js: JsonNode): bool = js.kind == JNull template isNull*(js: JsonNode): bool = js.kind == JNull
template notNull*(js: JsonNode): bool = js.kind != JNull template notNull*(js: JsonNode): bool = js.kind != JNull
@ -133,92 +124,65 @@ proc getTombstone*(js: JsonNode): string =
result = js{"tombstoneInfo", "richText", "text"}.getStr result = js{"tombstoneInfo", "richText", "text"}.getStr
result.removeSuffix(" Learn more") result.removeSuffix(" Learn more")
proc extractSlice(js: JsonNode): Slice[int] = template getSlice(text: string; slice: seq[int]): string =
result = js["indices"][0].getInt ..< js["indices"][1].getInt text.runeSubStr(slice[0], slice[1] - slice[0])
proc getSlice(text: string; js: JsonNode): string =
if js.kind != JArray or js.len < 2 or js[0].kind != JInt: return text
let slice = @[js{0}.getInt, js{1}.getInt]
text.getSlice(slice)
proc expandUrl(text: var string; js: JsonNode; tLen: int; hideTwitter=false) =
let u = js{"url"}.getStr
if u.len == 0 or u notin text:
return
proc extractUrls(result: var seq[ReplaceSlice]; js: JsonNode;
textLen: int; hideTwitter = false) =
let let
url = js["expanded_url"].getStr url = js{"expanded_url"}.getStr
slice = js.extractSlice slice = js{"indices"}[1].getInt
if hideTwitter and slice.b >= textLen and url.isTwitterUrl: if hideTwitter and slice >= tLen and url.isTwitterUrl:
if slice.a < textLen: text = text.replace(u, "")
result.add ReplaceSlice(kind: rkRemove, slice: slice) text.removeSuffix(' ')
text.removeSuffix('\n')
else: else:
result.add ReplaceSlice(kind: rkUrl, url: url, text = text.replace(u, a(shortLink(url), href=url))
display: url.shortLink, slice: slice)
proc extractHashtags(result: var seq[ReplaceSlice]; js: JsonNode) = proc expandMention(text: var string; orig: string; js: JsonNode) =
result.add ReplaceSlice(kind: rkHashtag, slice: js.extractSlice) let
name = js{"name"}.getStr
proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice]; href = '/' & js{"screen_name"}.getStr
textSlice: Slice[int]): string = uname = orig.getSlice(js{"indices"})
template extractLowerBound(i: int; idx): int = text = text.replace(uname, a(uname, href=href, title=name))
if i > 0: repls[idx].slice.b.succ else: textSlice.a
result = newStringOfCap(runes.len)
for i, rep in repls:
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
case rep.kind
of rkHashtag:
let
name = $runes[rep.slice.a.succ .. rep.slice.b]
symbol = $runes[rep.slice.a]
result.add a(symbol & name, href = "/search?q=%23" & name)
of rkMention:
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
of rkUrl:
result.add a(rep.display, href = rep.url)
of rkRemove:
discard
result.add $runes[extractLowerBound(repls.len, ^1) ..< textSlice.b]
proc deduplicate(s: var seq[ReplaceSlice]) =
var
len = s.len
i = 0
while i < len:
var j = i + 1
while j < len:
if s[i].slice.a == s[j].slice.a:
s.del j
dec len
else:
inc j
inc i
proc cmp(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
proc expandProfileEntities*(profile: var Profile; js: JsonNode) = proc expandProfileEntities*(profile: var Profile; js: JsonNode) =
let let
orig = profile.bio.toRunes orig = profile.bio
ent = ? js{"entities"} ent = ? js{"entities"}
with urls, ent{"url", "urls"}: with urls, ent{"url", "urls"}:
profile.website = urls[0]{"expanded_url"}.getStr profile.website = urls[0]{"expanded_url"}.getStr
var replacements = newSeq[ReplaceSlice]()
with urls, ent{"description", "urls"}: with urls, ent{"description", "urls"}:
for u in urls: for u in urls: profile.bio.expandUrl(u, orig.high)
replacements.extractUrls(u, orig.high)
replacements.deduplicate
replacements.sort(cmp)
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
profile.bio = profile.bio.replace(unRegex, unReplace) profile.bio = profile.bio.replace(unRegex, unReplace)
.replace(htRegex, htReplace) .replace(htRegex, htReplace)
for mention in ? ent{"user_mentions"}:
profile.bio.expandMention(orig, mention)
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) = proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
let let
orig = tweet.text.toRunes orig = tweet.text
textRange = js{"display_text_range"} textRange = js{"display_text_range"}
textSlice = textRange{0}.getInt .. textRange{1}.getInt slice = @[textRange{0}.getInt, textRange{1}.getInt]
hasQuote = js{"is_quote_status"}.getBool hasQuote = js{"is_quote_status"}.getBool
hasCard = tweet.card.isSome hasCard = tweet.card.isSome
tweet.text = tweet.text.getSlice(slice)
var replyTo = "" var replyTo = ""
if tweet.replyId != 0: if tweet.replyId != 0:
with reply, js{"in_reply_to_screen_name"}: with reply, js{"in_reply_to_screen_name"}:
@ -227,45 +191,26 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
let ent = ? js{"entities"} let ent = ? js{"entities"}
var replacements = newSeq[ReplaceSlice]()
with urls, ent{"urls"}: with urls, ent{"urls"}:
for u in urls: for u in urls:
let urlStr = u["url"].getStr tweet.text.expandUrl(u, slice[1], hasQuote)
if urlStr.len == 0 or urlStr notin tweet.text:
continue
replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
if hasCard and u{"url"}.getStr == get(tweet.card).url: if hasCard and u{"url"}.getStr == get(tweet.card).url:
get(tweet.card).url = u{"expanded_url"}.getStr get(tweet.card).url = u{"expanded_url"}.getStr
with media, ent{"media"}: with media, ent{"media"}:
for m in media: for m in media: tweet.text.expandUrl(m, slice[1], hideTwitter=true)
replacements.extractUrls(m, textSlice.b, hideTwitter = true)
if "hashtags" in ent: if "hashtags" in ent or "symbols" in ent:
for hashtag in ent["hashtags"]: tweet.text = tweet.text.replace(htRegex, htReplace)
replacements.extractHashtags(hashtag)
if "symbols" in ent: for mention in ? ent{"user_mentions"}:
for symbol in ent["symbols"]: let
replacements.extractHashtags(symbol) name = mention{"screen_name"}.getStr
idx = tweet.reply.find(name)
if "user_mentions" in ent: if mention{"indices"}[0].getInt >= slice[0]:
for mention in ent["user_mentions"]: tweet.text.expandMention(orig, mention)
let if idx > -1 and name != replyTo:
name = mention{"screen_name"}.getStr tweet.reply.delete idx
slice = mention.extractSlice elif idx == -1 and tweet.replyId != 0:
idx = tweet.reply.find(name) tweet.reply.add name
if slice.a >= textSlice.a:
replacements.add ReplaceSlice(kind: rkMention, slice: slice,
url: "/" & name, display: mention["name"].getStr)
if idx > -1 and name != replyTo:
tweet.reply.delete idx
elif idx == -1 and tweet.replyId != 0:
tweet.reply.add name
replacements.deduplicate
replacements.sort(cmp)
tweet.text = orig.replacedWith(replacements, textSlice)