Deduplicate note tweet parsing
This commit is contained in:
parent
368974c803
commit
aea884c48e
3 changed files with 35 additions and 65 deletions
|
@ -204,12 +204,12 @@ proc parseTweet(js: JsonNode; jsCard: JsonNode = newJNull()): Tweet =
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
result.expandTweetEntities(js)
|
||||||
|
|
||||||
# fix for pinned threads
|
# fix for pinned threads
|
||||||
if result.hasThread and result.threadId == 0:
|
if result.hasThread and result.threadId == 0:
|
||||||
result.threadId = js{"self_thread", "id_str"}.getId
|
result.threadId = js{"self_thread", "id_str"}.getId
|
||||||
|
|
||||||
result.expandTweetEntities(js)
|
|
||||||
|
|
||||||
if js{"is_quote_status"}.getBool:
|
if js{"is_quote_status"}.getBool:
|
||||||
result.quote = some Tweet(id: js{"quoted_status_id_str"}.getId)
|
result.quote = some Tweet(id: js{"quoted_status_id_str"}.getId)
|
||||||
|
|
||||||
|
|
|
@ -230,47 +230,37 @@ proc expandUserEntities*(user: var User; js: JsonNode) =
|
||||||
user.bio = user.bio.replacef(unRegex, unReplace)
|
user.bio = user.bio.replacef(unRegex, unReplace)
|
||||||
.replacef(htRegex, htReplace)
|
.replacef(htRegex, htReplace)
|
||||||
|
|
||||||
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
|
proc expandTextEntities(tweet: Tweet; entities: JsonNode; text: string; textSlice: Slice[int];
|
||||||
let
|
replyTo=""; hasQuote=false) =
|
||||||
orig = tweet.text.toRunes
|
let hasCard = tweet.card.isSome
|
||||||
textRange = js{"display_text_range"}
|
|
||||||
textSlice = textRange{0}.getInt .. textRange{1}.getInt
|
|
||||||
hasQuote = js{"is_quote_status"}.getBool
|
|
||||||
hasCard = tweet.card.isSome
|
|
||||||
|
|
||||||
var replyTo = ""
|
|
||||||
if tweet.replyId != 0:
|
|
||||||
with reply, js{"in_reply_to_screen_name"}:
|
|
||||||
tweet.reply.add reply.getStr
|
|
||||||
replyTo = reply.getStr
|
|
||||||
|
|
||||||
let ent = ? js{"entities"}
|
|
||||||
|
|
||||||
var replacements = newSeq[ReplaceSlice]()
|
var replacements = newSeq[ReplaceSlice]()
|
||||||
|
|
||||||
with urls, ent{"urls"}:
|
with urls, entities{"urls"}:
|
||||||
for u in urls:
|
for u in urls:
|
||||||
let urlStr = u["url"].getStr
|
let urlStr = u["url"].getStr
|
||||||
if urlStr.len == 0 or urlStr notin tweet.text:
|
if urlStr.len == 0 or urlStr notin text:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
|
replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
|
||||||
|
|
||||||
if hasCard and u{"url"}.getStr == get(tweet.card).url:
|
if hasCard and u{"url"}.getStr == get(tweet.card).url:
|
||||||
get(tweet.card).url = u{"expanded_url"}.getStr
|
get(tweet.card).url = u{"expanded_url"}.getStr
|
||||||
|
|
||||||
with media, ent{"media"}:
|
with media, entities{"media"}:
|
||||||
for m in media:
|
for m in media:
|
||||||
replacements.extractUrls(m, textSlice.b, hideTwitter = true)
|
replacements.extractUrls(m, textSlice.b, hideTwitter = true)
|
||||||
|
|
||||||
if "hashtags" in ent:
|
if "hashtags" in entities:
|
||||||
for hashtag in ent["hashtags"]:
|
for hashtag in entities["hashtags"]:
|
||||||
replacements.extractHashtags(hashtag)
|
replacements.extractHashtags(hashtag)
|
||||||
|
|
||||||
if "symbols" in ent:
|
if "symbols" in entities:
|
||||||
for symbol in ent["symbols"]:
|
for symbol in entities["symbols"]:
|
||||||
replacements.extractHashtags(symbol)
|
replacements.extractHashtags(symbol)
|
||||||
|
|
||||||
if "user_mentions" in ent:
|
if "user_mentions" in entities:
|
||||||
for mention in ent["user_mentions"]:
|
for mention in entities["user_mentions"]:
|
||||||
let
|
let
|
||||||
name = mention{"screen_name"}.getStr
|
name = mention{"screen_name"}.getStr
|
||||||
slice = mention.extractSlice
|
slice = mention.extractSlice
|
||||||
|
@ -287,47 +277,27 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
|
||||||
replacements.deduplicate
|
replacements.deduplicate
|
||||||
replacements.sort(cmp)
|
replacements.sort(cmp)
|
||||||
|
|
||||||
tweet.text = orig.replacedWith(replacements, textSlice)
|
tweet.text = text.toRunes.replacedWith(replacements, textSlice).strip(leading=false)
|
||||||
.strip(leading=false)
|
|
||||||
|
|
||||||
proc expandNoteTweetEntities*(tweet: Tweet; noteTweet: JsonNode) =
|
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
|
||||||
let
|
let
|
||||||
text = noteTweet{"text"}.getStr
|
entities = ? js{"entities"}
|
||||||
orig = text.toRunes
|
hasQuote = js{"is_quote_status"}.getBool
|
||||||
ent = ? noteTweet{"entity_set"}
|
textRange = js{"display_text_range"}
|
||||||
hasCard = tweet.card.isSome
|
textSlice = textRange{0}.getInt .. textRange{1}.getInt
|
||||||
|
|
||||||
var replacements = newSeq[ReplaceSlice]()
|
var replyTo = ""
|
||||||
|
if tweet.replyId != 0:
|
||||||
|
with reply, js{"in_reply_to_screen_name"}:
|
||||||
|
replyTo = reply.getStr
|
||||||
|
tweet.reply.add replyTo
|
||||||
|
|
||||||
with urls, ent{"urls"}:
|
tweet.expandTextEntities(entities, tweet.text, textSlice, replyTo, hasQuote)
|
||||||
for u in urls:
|
|
||||||
let urlStr = u["url"].getStr
|
|
||||||
if urlStr.len == 0 or urlStr notin text:
|
|
||||||
continue
|
|
||||||
replacements.extractUrls(u, orig.len, hideTwitter = false)
|
|
||||||
if hasCard and u{"url"}.getStr == get(tweet.card).url:
|
|
||||||
get(tweet.card).url = u{"expanded_url"}.getStr
|
|
||||||
|
|
||||||
if "hashtags" in ent:
|
proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) =
|
||||||
for hashtag in ent["hashtags"]:
|
|
||||||
replacements.extractHashtags(hashtag)
|
|
||||||
|
|
||||||
if "symbols" in ent:
|
|
||||||
for symbol in ent["symbols"]:
|
|
||||||
replacements.extractHashtags(symbol)
|
|
||||||
|
|
||||||
if "user_mentions" in ent:
|
|
||||||
for mention in ent["user_mentions"]:
|
|
||||||
let
|
let
|
||||||
name = mention{"screen_name"}.getStr
|
entities = ? js{"entity_set"}
|
||||||
slice = mention.extractSlice
|
text = js{"text"}.getStr
|
||||||
idx = tweet.reply.find(name)
|
textSlice = 0..text.runeLen
|
||||||
|
|
||||||
replacements.add ReplaceSlice(kind: rkMention, slice: slice,
|
tweet.expandTextEntities(entities, text, textSlice)
|
||||||
url: "/" & name, display: mention["name"].getStr)
|
|
||||||
|
|
||||||
replacements.deduplicate
|
|
||||||
replacements.sort(cmp)
|
|
||||||
|
|
||||||
tweet.text = orig.replacedWith(replacements, 0..orig.len)
|
|
||||||
.strip(leading=false)
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ no_thumb = [
|
||||||
|
|
||||||
['nim_lang/status/1082989146040340480',
|
['nim_lang/status/1082989146040340480',
|
||||||
'Nim in 2018: A short recap',
|
'Nim in 2018: A short recap',
|
||||||
'36 votes and 46 comments so far on Reddit',
|
'Posted by u/miran1 - 36 votes and 46 comments',
|
||||||
'reddit.com']
|
'reddit.com']
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue