Deduplicate note tweet parsing
This commit is contained in:
		
							parent
							
								
									368974c803
								
							
						
					
					
						commit
						aea884c48e
					
				
					 3 changed files with 35 additions and 65 deletions
				
			
		| 
						 | 
				
			
			@ -204,12 +204,12 @@ proc parseTweet(js: JsonNode; jsCard: JsonNode = newJNull()): Tweet =
 | 
			
		|||
    )
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
  result.expandTweetEntities(js)
 | 
			
		||||
 | 
			
		||||
  # fix for pinned threads
 | 
			
		||||
  if result.hasThread and result.threadId == 0:
 | 
			
		||||
    result.threadId = js{"self_thread", "id_str"}.getId
 | 
			
		||||
 | 
			
		||||
  result.expandTweetEntities(js)
 | 
			
		||||
 | 
			
		||||
  if js{"is_quote_status"}.getBool:
 | 
			
		||||
    result.quote = some Tweet(id: js{"quoted_status_id_str"}.getId)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -230,47 +230,37 @@ proc expandUserEntities*(user: var User; js: JsonNode) =
 | 
			
		|||
  user.bio = user.bio.replacef(unRegex, unReplace)
 | 
			
		||||
                     .replacef(htRegex, htReplace)
 | 
			
		||||
 | 
			
		||||
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
 | 
			
		||||
  let
 | 
			
		||||
    orig = tweet.text.toRunes
 | 
			
		||||
    textRange = js{"display_text_range"}
 | 
			
		||||
    textSlice = textRange{0}.getInt .. textRange{1}.getInt
 | 
			
		||||
    hasQuote = js{"is_quote_status"}.getBool
 | 
			
		||||
    hasCard = tweet.card.isSome
 | 
			
		||||
 | 
			
		||||
  var replyTo = ""
 | 
			
		||||
  if tweet.replyId != 0:
 | 
			
		||||
    with reply, js{"in_reply_to_screen_name"}:
 | 
			
		||||
      tweet.reply.add reply.getStr
 | 
			
		||||
      replyTo = reply.getStr
 | 
			
		||||
 | 
			
		||||
  let ent = ? js{"entities"}
 | 
			
		||||
proc expandTextEntities(tweet: Tweet; entities: JsonNode; text: string; textSlice: Slice[int];
 | 
			
		||||
                        replyTo=""; hasQuote=false) =
 | 
			
		||||
  let hasCard = tweet.card.isSome
 | 
			
		||||
 | 
			
		||||
  var replacements = newSeq[ReplaceSlice]()
 | 
			
		||||
 | 
			
		||||
  with urls, ent{"urls"}:
 | 
			
		||||
  with urls, entities{"urls"}:
 | 
			
		||||
    for u in urls:
 | 
			
		||||
      let urlStr = u["url"].getStr
 | 
			
		||||
      if urlStr.len == 0 or urlStr notin tweet.text:
 | 
			
		||||
      if urlStr.len == 0 or urlStr notin text:
 | 
			
		||||
        continue
 | 
			
		||||
 | 
			
		||||
      replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
 | 
			
		||||
 | 
			
		||||
      if hasCard and u{"url"}.getStr == get(tweet.card).url:
 | 
			
		||||
        get(tweet.card).url = u{"expanded_url"}.getStr
 | 
			
		||||
 | 
			
		||||
  with media, ent{"media"}:
 | 
			
		||||
  with media, entities{"media"}:
 | 
			
		||||
    for m in media:
 | 
			
		||||
      replacements.extractUrls(m, textSlice.b, hideTwitter = true)
 | 
			
		||||
 | 
			
		||||
  if "hashtags" in ent:
 | 
			
		||||
    for hashtag in ent["hashtags"]:
 | 
			
		||||
  if "hashtags" in entities:
 | 
			
		||||
    for hashtag in entities["hashtags"]:
 | 
			
		||||
      replacements.extractHashtags(hashtag)
 | 
			
		||||
 | 
			
		||||
  if "symbols" in ent:
 | 
			
		||||
    for symbol in ent["symbols"]:
 | 
			
		||||
  if "symbols" in entities:
 | 
			
		||||
    for symbol in entities["symbols"]:
 | 
			
		||||
      replacements.extractHashtags(symbol)
 | 
			
		||||
 | 
			
		||||
  if "user_mentions" in ent:
 | 
			
		||||
    for mention in ent["user_mentions"]:
 | 
			
		||||
  if "user_mentions" in entities:
 | 
			
		||||
    for mention in entities["user_mentions"]:
 | 
			
		||||
      let
 | 
			
		||||
        name = mention{"screen_name"}.getStr
 | 
			
		||||
        slice = mention.extractSlice
 | 
			
		||||
| 
						 | 
				
			
			@ -287,47 +277,27 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
 | 
			
		|||
  replacements.deduplicate
 | 
			
		||||
  replacements.sort(cmp)
 | 
			
		||||
 | 
			
		||||
  tweet.text = orig.replacedWith(replacements, textSlice)
 | 
			
		||||
                   .strip(leading=false)
 | 
			
		||||
  tweet.text = text.toRunes.replacedWith(replacements, textSlice).strip(leading=false)
 | 
			
		||||
 | 
			
		||||
proc expandNoteTweetEntities*(tweet: Tweet; noteTweet: JsonNode) =
 | 
			
		||||
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
 | 
			
		||||
  let
 | 
			
		||||
    text = noteTweet{"text"}.getStr
 | 
			
		||||
    orig = text.toRunes
 | 
			
		||||
    ent = ? noteTweet{"entity_set"}
 | 
			
		||||
    hasCard = tweet.card.isSome
 | 
			
		||||
    entities = ? js{"entities"}
 | 
			
		||||
    hasQuote = js{"is_quote_status"}.getBool
 | 
			
		||||
    textRange = js{"display_text_range"}
 | 
			
		||||
    textSlice = textRange{0}.getInt .. textRange{1}.getInt
 | 
			
		||||
 | 
			
		||||
  var replacements = newSeq[ReplaceSlice]()
 | 
			
		||||
  var replyTo = ""
 | 
			
		||||
  if tweet.replyId != 0:
 | 
			
		||||
    with reply, js{"in_reply_to_screen_name"}:
 | 
			
		||||
      replyTo = reply.getStr
 | 
			
		||||
      tweet.reply.add replyTo
 | 
			
		||||
 | 
			
		||||
  with urls, ent{"urls"}:
 | 
			
		||||
    for u in urls:
 | 
			
		||||
      let urlStr = u["url"].getStr
 | 
			
		||||
      if urlStr.len == 0 or urlStr notin text:
 | 
			
		||||
        continue
 | 
			
		||||
      replacements.extractUrls(u, orig.len, hideTwitter = false)
 | 
			
		||||
      if hasCard and u{"url"}.getStr == get(tweet.card).url:
 | 
			
		||||
        get(tweet.card).url = u{"expanded_url"}.getStr
 | 
			
		||||
  tweet.expandTextEntities(entities, tweet.text, textSlice, replyTo, hasQuote)
 | 
			
		||||
 | 
			
		||||
  if "hashtags" in ent:
 | 
			
		||||
    for hashtag in ent["hashtags"]:
 | 
			
		||||
      replacements.extractHashtags(hashtag)
 | 
			
		||||
 | 
			
		||||
  if "symbols" in ent:
 | 
			
		||||
    for symbol in ent["symbols"]:
 | 
			
		||||
      replacements.extractHashtags(symbol)
 | 
			
		||||
 | 
			
		||||
  if "user_mentions" in ent:
 | 
			
		||||
    for mention in ent["user_mentions"]:
 | 
			
		||||
proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) =
 | 
			
		||||
  let
 | 
			
		||||
        name = mention{"screen_name"}.getStr
 | 
			
		||||
        slice = mention.extractSlice
 | 
			
		||||
        idx = tweet.reply.find(name)
 | 
			
		||||
    entities = ? js{"entity_set"}
 | 
			
		||||
    text = js{"text"}.getStr
 | 
			
		||||
    textSlice = 0..text.runeLen
 | 
			
		||||
 | 
			
		||||
      replacements.add ReplaceSlice(kind: rkMention, slice: slice,
 | 
			
		||||
        url: "/" & name, display: mention["name"].getStr)
 | 
			
		||||
 | 
			
		||||
  replacements.deduplicate
 | 
			
		||||
  replacements.sort(cmp)
 | 
			
		||||
 | 
			
		||||
  tweet.text = orig.replacedWith(replacements, 0..orig.len)
 | 
			
		||||
                   .strip(leading=false)
 | 
			
		||||
  tweet.expandTextEntities(entities, text, textSlice)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -42,7 +42,7 @@ no_thumb = [
 | 
			
		|||
 | 
			
		||||
    ['nim_lang/status/1082989146040340480',
 | 
			
		||||
     'Nim in 2018: A short recap',
 | 
			
		||||
     '36 votes and 46 comments so far on Reddit',
 | 
			
		||||
     'Posted by u/miran1 - 36 votes and 46 comments',
 | 
			
		||||
     'reddit.com']
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue