nitter/src/parserutils.nim

# SPDX-License-Identifier: AGPL-3.0-only
import std/[strutils, times, macros, htmlgen, options, algorithm, re]
import std/unicode except strip
import packedjson
import types, utils, formatters

let
  unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
  unReplace = "$1<a href=\"/$2\">@$2</a>"

  htRegex = re"(^|[^\w-_./?])([#＃$])([\w_]+)"
  htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"

type
  ReplaceSliceKind = enum
    rkRemove, rkUrl, rkHashtag, rkMention

  ReplaceSlice = object
    slice: Slice[int]
    kind: ReplaceSliceKind
    url, display: string

template isNull*(js: JsonNode): bool = js.kind == JNull
template notNull*(js: JsonNode): bool = js.kind != JNull

template `?`*(js: JsonNode): untyped =
  let j = js
  if j.isNull: return
  j

template `with`*(ident, value, body): untyped =
  block:
    let ident {.inject.} = value
    if ident != nil: body

template `with`*(ident; value: JsonNode; body): untyped =
  block:
    let ident {.inject.} = value
    if value.notNull: body

template getCursor*(js: JsonNode): string =
  js{"content", "operation", "cursor", "value"}.getStr

template getError*(js: JsonNode): Error =
  if js.kind != JArray or js.len == 0: null
  else: Error(js[0]{"code"}.getInt)

template parseTime(time: string; f: static string; flen: int): DateTime =
  if time.len != flen: return
  parse(time, f, utc())

proc getDateTime*(js: JsonNode): DateTime =
  parseTime(js.getStr, "yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)

proc getTime*(js: JsonNode): DateTime =
  parseTime(js.getStr, "ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)

proc getId*(id: string): string {.inline.} =
  let start = id.rfind("-")
  if start < 0: return id
  id[start + 1 ..< id.len]

proc getId*(js: JsonNode): int64 {.inline.} =
  case js.kind
  of JString: return parseBiggestInt(js.getStr("0"))
  of JInt: return js.getBiggestInt()
  else: return 0

proc getEntryId*(js: JsonNode): string {.inline.} =
  let entry = js{"entryId"}.getStr
  if entry.len == 0: return

  if "tweet" in entry or "sq-I-t" in entry:
    return entry.getId
  elif "tombstone" in entry:
    return js{"content", "item", "content", "tombstone", "tweet", "id"}.getStr
  else:
    echo "unknown entry: ", entry
    return

template getStrVal*(js: JsonNode; default=""): string =
  js{"string_value"}.getStr(default)

proc getImageStr*(js: JsonNode): string =
  result = js.getStr
  result.removePrefix(https)
  result.removePrefix(twimg)

template getImageVal*(js: JsonNode): string =
  js{"image_value", "url"}.getImageStr

proc getCardUrl*(js: JsonNode; kind: CardKind): string =
  result = js{"website_url"}.getStrVal
  if kind == promoVideoConvo:
    result = js{"thank_you_url"}.getStrVal(result)
  if result.startsWith("card://"):
    result = ""

proc getCardDomain*(js: JsonNode; kind: CardKind): string =
  result = js{"vanity_url"}.getStrVal(js{"domain"}.getStr)
  if kind == promoVideoConvo:
    result = js{"thank_you_vanity_url"}.getStrVal(result)

proc getCardTitle*(js: JsonNode; kind: CardKind): string =
  result = js{"title"}.getStrVal
  if kind == promoVideoConvo:
    result = js{"thank_you_text"}.getStrVal(result)
  elif kind == liveEvent:
    result = js{"event_category"}.getStrVal
  elif kind in {videoDirectMessage, imageDirectMessage}:
    result = js{"cta1"}.getStrVal

proc getBanner*(js: JsonNode): string =
  let url = js{"profile_banner_url"}.getImageStr
  if url.len > 0:
    return url & "/1500x500"

  let color = js{"profile_link_color"}.getStr
  if color.len > 0:
    return '#' & color

  # use primary color from profile picture color histogram
  with p, js{"profile_image_extensions", "mediaColor", "r", "ok", "palette"}:
    if p.len > 0:
      let pal = p[0]{"rgb"}
      result = "#"
      result.add toHex(pal{"red"}.getInt, 2)
      result.add toHex(pal{"green"}.getInt, 2)
      result.add toHex(pal{"blue"}.getInt, 2)
      return

proc getTombstone*(js: JsonNode): string =
  result = js{"tombstoneInfo", "richText", "text"}.getStr
  result.removeSuffix(" Learn more")

proc extractSlice(js: JsonNode): Slice[int] =
  result = js["indices"][0].getInt ..< js["indices"][1].getInt

proc extractUrls(result: var seq[ReplaceSlice]; js: JsonNode;
                 textLen: int; hideTwitter = false) =
  let
    url = js["expanded_url"].getStr
    slice = js.extractSlice

  if hideTwitter and slice.b.succ >= textLen and url.isTwitterUrl:
    if slice.a < textLen:
      result.add ReplaceSlice(kind: rkRemove, slice: slice)
  else:
    result.add ReplaceSlice(kind: rkUrl, url: url,
                            display: url.shortLink, slice: slice)

proc extractHashtags(result: var seq[ReplaceSlice]; js: JsonNode) =
  result.add ReplaceSlice(kind: rkHashtag, slice: js.extractSlice)

proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice];
                  textSlice: Slice[int]): string =
  template extractLowerBound(i: int; idx): int =
    if i > 0: repls[idx].slice.b.succ else: textSlice.a

  result = newStringOfCap(runes.len)

  for i, rep in repls:
    result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
    case rep.kind
    of rkHashtag:
      let
        name = $runes[rep.slice.a.succ .. rep.slice.b]
        symbol = $runes[rep.slice.a]
      result.add a(symbol & name, href = "/search?q=%23" & name)
    of rkMention:
      result.add a($runes[rep.slice], href = rep.url, title = rep.display)
    of rkUrl:
      result.add a(rep.display, href = rep.url)
    of rkRemove:
      discard

  let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
  if rest.a <= rest.b:
    result.add $runes[rest]

proc deduplicate(s: var seq[ReplaceSlice]) =
  var
    len = s.len
    i = 0
  while i < len:
    var j = i + 1
    while j < len:
      if s[i].slice.a == s[j].slice.a:
        s.del j
        dec len
      else:
        inc j
    inc i

proc cmp(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)

proc expandUserEntities*(user: var User; js: JsonNode) =
  let
    orig = user.bio.toRunes
    ent = ? js{"entities"}

  with urls, ent{"url", "urls"}:
    user.website = urls[0]{"expanded_url"}.getStr

  var replacements = newSeq[ReplaceSlice]()

  with urls, ent{"description", "urls"}:
    for u in urls:
      replacements.extractUrls(u, orig.high)

  replacements.deduplicate
  replacements.sort(cmp)

  user.bio = orig.replacedWith(replacements, 0 .. orig.len)
  user.bio = user.bio.replacef(unRegex, unReplace)
                     .replacef(htRegex, htReplace)

proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
  let
    orig = tweet.text.toRunes
    textRange = js{"display_text_range"}
    textSlice = textRange{0}.getInt .. textRange{1}.getInt
    hasQuote = js{"is_quote_status"}.getBool
    hasCard = tweet.card.isSome

  var replyTo = ""
  if tweet.replyId != 0:
    with reply, js{"in_reply_to_screen_name"}:
      tweet.reply.add reply.getStr
      replyTo = reply.getStr

  let ent = ? js{"entities"}

  var replacements = newSeq[ReplaceSlice]()

  with urls, ent{"urls"}:
    for u in urls:
      let urlStr = u["url"].getStr
      if urlStr.len == 0 or urlStr notin tweet.text:
        continue
      replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
      if hasCard and u{"url"}.getStr == get(tweet.card).url:
        get(tweet.card).url = u{"expanded_url"}.getStr

  with media, ent{"media"}:
    for m in media:
      replacements.extractUrls(m, textSlice.b, hideTwitter = true)

  if "hashtags" in ent:
    for hashtag in ent["hashtags"]:
      replacements.extractHashtags(hashtag)

  if "symbols" in ent:
    for symbol in ent["symbols"]:
      replacements.extractHashtags(symbol)

  if "user_mentions" in ent:
    for mention in ent["user_mentions"]:
      let
        name = mention{"screen_name"}.getStr
        slice = mention.extractSlice
        idx = tweet.reply.find(name)

      if slice.a >= textSlice.a:
        replacements.add ReplaceSlice(kind: rkMention, slice: slice,
          url: "/" & name, display: mention["name"].getStr)
        if idx > -1 and name != replyTo:
          tweet.reply.delete idx
      elif idx == -1 and tweet.replyId != 0:
        tweet.reply.add name

  replacements.deduplicate
  replacements.sort(cmp)

  tweet.text = orig.replacedWith(replacements, textSlice)
                   .strip(leading=false)
-												Add license headers

Closes #413

											
										
										
											2021-12-27 01:37:38 +00:00
+								# SPDX-License-Identifier: AGPL-3.0-only
-												Strip trailing newlines from tweets

											
										
										
											2022-01-16 05:18:01 +00:00
+								import std/[strutils, times, macros, htmlgen, options, algorithm, re]
 								import std/unicode except strip
-												Remove nim-regex dependency, improve performance

											
										
										
											2022-01-11 02:10:42 +00:00
+								import packedjson
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								import types, utils, formatters
-												Remove nim-regex dependency, improve performance

											
										
										
											2022-01-11 02:10:42 +00:00
+								let
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
 								  unReplace = "$1<a href=\"/$2\">@$2</a>"
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								  htRegex = re"(^|[^\w-_./?])([#＃$])([\w_]+)"
-												Use regex for hashtags instead

											
										
										
											2020-06-06 08:17:19 +00:00
+								  htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								type
 								  ReplaceSliceKind = enum
 								    rkRemove, rkUrl, rkHashtag, rkMention
 								  ReplaceSlice = object
 								    slice: Slice[int]
 								    kind: ReplaceSliceKind
 								    url, display: string
-												Switch to packedjson to try lowering memory usage

											
										
										
											2020-06-02 14:22:44 +00:00
+								template isNull*(js: JsonNode): bool = js.kind == JNull
 								template notNull*(js: JsonNode): bool = js.kind != JNull
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								template `?`*(js: JsonNode): untyped =
 								  let j = js
-												Switch to packedjson to try lowering memory usage

											
										
										
											2020-06-02 14:22:44 +00:00
+								  if j.isNull: return
-												Fix compilation error with nim 1.4.0

											
										
										
											2020-11-10 13:04:01 +00:00
+								  j
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
 								template `with`*(ident, value, body): untyped =
 								  block:
 								    let ident {.inject.} = value
-												Minor token changes

											
										
										
											2020-06-01 11:40:26 +00:00
+								    if ident != nil: body
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
 								template `with`*(ident; value: JsonNode; body): untyped =
 								  block:
 								    let ident {.inject.} = value
-												Switch to packedjson to try lowering memory usage

											
										
										
											2020-06-02 14:22:44 +00:00
+								    if value.notNull: body
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Minor optimizations

											
										
										
											2020-06-01 11:47:43 +00:00
+								template getCursor*(js: JsonNode): string =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  js{"content", "operation", "cursor", "value"}.getStr
-												Simplify error parser

											
										
										
											2020-06-01 19:53:21 +00:00
+								template getError*(js: JsonNode): Error =
 								  if js.kind != JArray or js.len == 0: null
 								  else: Error(js[0]{"code"}.getInt)
-												Fix compiler warnings

											
										
										
											2021-12-20 02:11:12 +00:00
+								template parseTime(time: string; f: static string; flen: int): DateTime =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  if time.len != flen: return
-												Fix compiler warnings

											
										
										
											2021-12-20 02:11:12 +00:00
+								  parse(time, f, utc())
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Fix compiler warnings

											
										
										
											2021-12-20 02:11:12 +00:00
+								proc getDateTime*(js: JsonNode): DateTime =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  parseTime(js.getStr, "yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
-												Fix compiler warnings

											
										
										
											2021-12-20 02:11:12 +00:00
+								proc getTime*(js: JsonNode): DateTime =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  parseTime(js.getStr, "ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
-												Minor optimizations

											
										
										
											2020-06-01 11:47:43 +00:00
+								proc getId*(id: string): string {.inline.} =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  let start = id.rfind("-")
 								  if start < 0: return id
 								  id[start + 1 ..< id.len]
-												Minor optimizations

											
										
										
											2020-06-01 11:47:43 +00:00
+								proc getId*(js: JsonNode): int64 {.inline.} =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  case js.kind
 								  of JString: return parseBiggestInt(js.getStr("0"))
 								  of JInt: return js.getBiggestInt()
 								  else: return 0
-												Fix parsing censored tweets

											
										
										
											2020-11-07 21:48:49 +00:00
+								proc getEntryId*(js: JsonNode): string {.inline.} =
 								  let entry = js{"entryId"}.getStr
 								  if entry.len == 0: return
-												Add missing entry type to entryId parser

											
										
										
											2020-11-07 22:10:29 +00:00
+								  if "tweet" in entry or "sq-I-t" in entry:
-												Fix parsing censored tweets

											
										
										
											2020-11-07 21:48:49 +00:00
+								    return entry.getId
 								  elif "tombstone" in entry:
 								    return js{"content", "item", "content", "tombstone", "tweet", "id"}.getStr
 								  else:
 								    echo "unknown entry: ", entry
 								    return
-												Minor optimizations

											
										
										
											2020-06-01 11:47:43 +00:00
+								template getStrVal*(js: JsonNode; default=""): string =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  js{"string_value"}.getStr(default)
-												Strip https://pbs.twimg.com from image urls

											
										
										
											2020-06-07 05:55:57 +00:00
+								proc getImageStr*(js: JsonNode): string =
 								  result = js.getStr
 								  result.removePrefix(https)
 								  result.removePrefix(twimg)
 								template getImageVal*(js: JsonNode): string =
 								  js{"image_value", "url"}.getImageStr
-												Support more cards, even the undocumented ones

											
										
										
											2020-06-03 00:33:34 +00:00
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								proc getCardUrl*(js: JsonNode; kind: CardKind): string =
 								  result = js{"website_url"}.getStrVal
 								  if kind == promoVideoConvo:
 								    result = js{"thank_you_url"}.getStrVal(result)
-												Support more cards, even the undocumented ones

											
										
										
											2020-06-03 00:33:34 +00:00
+								  if result.startsWith("card://"):
 								    result = ""
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
 								proc getCardDomain*(js: JsonNode; kind: CardKind): string =
 								  result = js{"vanity_url"}.getStrVal(js{"domain"}.getStr)
 								  if kind == promoVideoConvo:
 								    result = js{"thank_you_vanity_url"}.getStrVal(result)
 								proc getCardTitle*(js: JsonNode; kind: CardKind): string =
 								  result = js{"title"}.getStrVal
 								  if kind == promoVideoConvo:
 								    result = js{"thank_you_text"}.getStrVal(result)
-												Support even more obscure card types

											
										
										
											2020-06-10 14:13:40 +00:00
+								  elif kind == liveEvent:
-												Support more cards, even the undocumented ones

											
										
										
											2020-06-03 00:33:34 +00:00
+								    result = js{"event_category"}.getStrVal
-												Support even more obscure card types

											
										
										
											2020-06-10 14:13:40 +00:00
+								  elif kind in {videoDirectMessage, imageDirectMessage}:
 								    result = js{"cta1"}.getStrVal
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
 								proc getBanner*(js: JsonNode): string =
-												Strip https://pbs.twimg.com from image urls

											
										
										
											2020-06-07 05:55:57 +00:00
+								  let url = js{"profile_banner_url"}.getImageStr
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  if url.len > 0:
 								    return url & "/1500x500"
 								  let color = js{"profile_link_color"}.getStr
 								  if color.len > 0:
 								    return '#' & color
-												Rearchitect profile, support pins, Profile -> User

											
										
										
											2022-01-23 06:04:50 +00:00
+								  # use primary color from profile picture color histogram
 								  with p, js{"profile_image_extensions", "mediaColor", "r", "ok", "palette"}:
 								    if p.len > 0:
 								      let pal = p[0]{"rgb"}
 								      result = "#"
 								      result.add toHex(pal{"red"}.getInt, 2)
 								      result.add toHex(pal{"green"}.getInt, 2)
 								      result.add toHex(pal{"blue"}.getInt, 2)
 								      return
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								proc getTombstone*(js: JsonNode): string =
-												Fix tombstone parsing

Apparently they just got rid of the "epitaph", oh well

											
										
										
											2020-06-12 06:01:31 +00:00
+								  result = js{"tombstoneInfo", "richText", "text"}.getStr
 								  result.removeSuffix(" Learn more")
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								proc extractSlice(js: JsonNode): Slice[int] =
 								  result = js["indices"][0].getInt ..< js["indices"][1].getInt
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								proc extractUrls(result: var seq[ReplaceSlice]; js: JsonNode;
 								                 textLen: int; hideTwitter = false) =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  let
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    url = js["expanded_url"].getStr
 								    slice = js.extractSlice
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								  if hideTwitter and slice.b.succ >= textLen and url.isTwitterUrl:
 								    if slice.a < textLen:
 								      result.add ReplaceSlice(kind: rkRemove, slice: slice)
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  else:
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    result.add ReplaceSlice(kind: rkUrl, url: url,
 								                            display: url.shortLink, slice: slice)
 								proc extractHashtags(result: var seq[ReplaceSlice]; js: JsonNode) =
 								  result.add ReplaceSlice(kind: rkHashtag, slice: js.extractSlice)
 								proc replacedWith(runes: seq[Rune]; repls: openArray[ReplaceSlice];
 								                  textSlice: Slice[int]): string =
 								  template extractLowerBound(i: int; idx): int =
 								    if i > 0: repls[idx].slice.b.succ else: textSlice.a
 								  result = newStringOfCap(runes.len)
 								  for i, rep in repls:
 								    result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
 								    case rep.kind
 								    of rkHashtag:
 								      let
 								        name = $runes[rep.slice.a.succ .. rep.slice.b]
 								        symbol = $runes[rep.slice.a]
 								      result.add a(symbol & name, href = "/search?q=%23" & name)
 								    of rkMention:
 								      result.add a($runes[rep.slice], href = rep.url, title = rep.display)
 								    of rkUrl:
 								      result.add a(rep.display, href = rep.url)
 								    of rkRemove:
 								      discard
 								  let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
 								  if rest.a <= rest.b:
 								    result.add $runes[rest]
 								proc deduplicate(s: var seq[ReplaceSlice]) =
 								  var
 								    len = s.len
 								    i = 0
 								  while i < len:
 								    var j = i + 1
 								    while j < len:
 								      if s[i].slice.a == s[j].slice.a:
 								        s.del j
 								        dec len
 								      else:
 								        inc j
 								    inc i
 								proc cmp(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Rearchitect profile, support pins, Profile -> User

											
										
										
											2022-01-23 06:04:50 +00:00
+								proc expandUserEntities*(user: var User; js: JsonNode) =
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  let
-												Rearchitect profile, support pins, Profile -> User

											
										
										
											2022-01-23 06:04:50 +00:00
+								    orig = user.bio.toRunes
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								    ent = ? js{"entities"}
 								  with urls, ent{"url", "urls"}:
-												Rearchitect profile, support pins, Profile -> User

											
										
										
											2022-01-23 06:04:50 +00:00
+								    user.website = urls[0]{"expanded_url"}.getStr
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								  var replacements = newSeq[ReplaceSlice]()
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  with urls, ent{"description", "urls"}:
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    for u in urls:
 								      replacements.extractUrls(u, orig.high)
 								  replacements.deduplicate
 								  replacements.sort(cmp)
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
-												Rearchitect profile, support pins, Profile -> User

											
										
										
											2022-01-23 06:04:50 +00:00
+								  user.bio = orig.replacedWith(replacements, 0 .. orig.len)
 								  user.bio = user.bio.replacef(unRegex, unReplace)
 								                     .replacef(htRegex, htReplace)
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
 								proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
 								  let
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    orig = tweet.text.toRunes
-												Switch to packedjson to try lowering memory usage

											
										
										
											2020-06-02 14:22:44 +00:00
+								    textRange = js{"display_text_range"}
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    textSlice = textRange{0}.getInt .. textRange{1}.getInt
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								    hasQuote = js{"is_quote_status"}.getBool
 								    hasCard = tweet.card.isSome
 								  var replyTo = ""
 								  if tweet.replyId != 0:
 								    with reply, js{"in_reply_to_screen_name"}:
 								      tweet.reply.add reply.getStr
 								      replyTo = reply.getStr
 								  let ent = ? js{"entities"}
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								  var replacements = newSeq[ReplaceSlice]()
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								  with urls, ent{"urls"}:
 								    for u in urls:
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								      let urlStr = u["url"].getStr
 								      if urlStr.len == 0 or urlStr notin tweet.text:
 								        continue
 								      replacements.extractUrls(u, textSlice.b, hideTwitter = hasQuote)
-												In with the new

											
										
										
											2020-06-01 00:16:24 +00:00
+								      if hasCard and u{"url"}.getStr == get(tweet.card).url:
 								        get(tweet.card).url = u{"expanded_url"}.getStr
 								  with media, ent{"media"}:
-												Improve tweet url and hashtag parsing

											
										
										
											2020-11-14 23:01:13 +00:00
+								    for m in media:
 								      replacements.extractUrls(m, textSlice.b, hideTwitter = true)
 								  if "hashtags" in ent:
 								    for hashtag in ent["hashtags"]:
 								      replacements.extractHashtags(hashtag)
 								  if "symbols" in ent:
 								    for symbol in ent["symbols"]:
 								      replacements.extractHashtags(symbol)
 								  if "user_mentions" in ent:
 								    for mention in ent["user_mentions"]:
 								      let
 								        name = mention{"screen_name"}.getStr
 								        slice = mention.extractSlice
 								        idx = tweet.reply.find(name)
 								      if slice.a >= textSlice.a:
 								        replacements.add ReplaceSlice(kind: rkMention, slice: slice,
 								          url: "/" & name, display: mention["name"].getStr)
 								        if idx > -1 and name != replyTo:
 								          tweet.reply.delete idx
 								      elif idx == -1 and tweet.replyId != 0:
 								        tweet.reply.add name
 								  replacements.deduplicate
 								  replacements.sort(cmp)
 								  tweet.text = orig.replacedWith(replacements, textSlice)
-												Strip trailing newlines from tweets

											
										
										
											2022-01-16 05:18:01 +00:00
+								                   .strip(leading=false)