2019-08-11 19:26:37 +00:00
|
|
|
import xmltree, sequtils, strutils, json
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-07-03 09:46:03 +00:00
|
|
|
import types, parserutils, formatters
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-08-11 19:26:55 +00:00
|
|
|
proc parseTimelineProfile*(node: XmlNode): Profile =
|
|
|
|
let profile = node.select(".ProfileHeaderCard")
|
|
|
|
if profile == nil: return
|
|
|
|
|
|
|
|
let pre = ".ProfileHeaderCard-"
|
|
|
|
result = Profile(
|
|
|
|
fullname: profile.getName(pre & "nameLink"),
|
|
|
|
username: profile.getUsername(pre & "screenname"),
|
|
|
|
joinDate: profile.getDate(pre & "joinDateText"),
|
|
|
|
location: profile.selectText(pre & "locationText").stripText(),
|
|
|
|
website: profile.selectText(pre & "url").stripText(),
|
|
|
|
bio: profile.getBio(pre & "bio"),
|
|
|
|
userpic: node.getAvatar(".profile-picture img"),
|
|
|
|
verified: isVerified(profile),
|
|
|
|
protected: isProtected(profile),
|
2019-08-11 21:24:02 +00:00
|
|
|
banner: getTimelineBanner(node),
|
|
|
|
media: getMediaCount(node)
|
2019-08-11 19:26:55 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
result.getProfileStats(node.select(".ProfileNav-list"))
|
|
|
|
|
2019-09-13 20:24:58 +00:00
|
|
|
proc parsePopupProfile*(node: XmlNode; selector=".profile-card"): Profile =
|
|
|
|
let profile = node.select(selector)
|
2019-06-27 19:07:29 +00:00
|
|
|
if profile == nil: return
|
2019-06-21 00:15:46 +00:00
|
|
|
|
|
|
|
result = Profile(
|
2019-06-24 00:09:32 +00:00
|
|
|
fullname: profile.getName(".fullname"),
|
|
|
|
username: profile.getUsername(".username"),
|
2019-09-13 20:24:58 +00:00
|
|
|
bio: profile.getBio(".bio", fallback=".ProfileCard-bio"),
|
2019-06-24 00:09:32 +00:00
|
|
|
userpic: profile.getAvatar(".ProfileCard-avatarImage"),
|
|
|
|
verified: isVerified(profile),
|
|
|
|
protected: isProtected(profile),
|
|
|
|
banner: getBanner(profile)
|
2019-06-21 00:15:46 +00:00
|
|
|
)
|
2019-06-24 06:07:36 +00:00
|
|
|
|
2019-06-23 23:34:30 +00:00
|
|
|
result.getPopupStats(profile)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-09-20 23:08:30 +00:00
|
|
|
proc parseListProfile*(profile: XmlNode): Profile =
|
|
|
|
result = Profile(
|
|
|
|
fullname: profile.getName(".fullname"),
|
|
|
|
username: profile.getUsername(".username"),
|
|
|
|
bio: profile.getBio(".bio"),
|
|
|
|
userpic: profile.getAvatar(".avatar"),
|
|
|
|
verified: isVerified(profile),
|
|
|
|
protected: isProtected(profile),
|
|
|
|
)
|
|
|
|
|
2019-06-21 00:15:46 +00:00
|
|
|
proc parseIntentProfile*(profile: XmlNode): Profile =
|
|
|
|
result = Profile(
|
2019-06-24 00:09:32 +00:00
|
|
|
fullname: profile.getName("a.fn.url.alternate-context"),
|
|
|
|
username: profile.getUsername(".nickname"),
|
|
|
|
bio: profile.getBio("p.note"),
|
2019-06-26 16:51:21 +00:00
|
|
|
userpic: profile.select(".profile.summary").getAvatar("img.photo"),
|
2019-06-27 19:07:29 +00:00
|
|
|
verified: profile.select("li.verified") != nil,
|
|
|
|
protected: profile.select("li.protected") != nil,
|
2019-06-24 00:09:32 +00:00
|
|
|
banner: getBanner(profile)
|
2019-06-21 00:15:46 +00:00
|
|
|
)
|
2019-06-24 06:07:36 +00:00
|
|
|
|
2019-06-23 23:34:30 +00:00
|
|
|
result.getIntentStats(profile)
|
2019-06-21 00:15:46 +00:00
|
|
|
|
|
|
|
proc parseTweetProfile*(profile: XmlNode): Profile =
|
2019-06-20 14:16:20 +00:00
|
|
|
result = Profile(
|
2019-06-27 19:07:29 +00:00
|
|
|
fullname: profile.attr("data-name").stripText(),
|
|
|
|
username: profile.attr("data-screen-name"),
|
2019-06-23 23:34:30 +00:00
|
|
|
userpic: profile.getAvatar(".avatar"),
|
|
|
|
verified: isVerified(profile)
|
|
|
|
)
|
|
|
|
|
2019-06-24 06:07:36 +00:00
|
|
|
proc parseQuote*(quote: XmlNode): Quote =
|
|
|
|
result = Quote(
|
2019-07-01 22:52:50 +00:00
|
|
|
id: quote.attr("data-item-id"),
|
|
|
|
text: getQuoteText(quote),
|
|
|
|
reply: parseTweetReply(quote),
|
|
|
|
hasThread: quote.select(".self-thread-context") != nil,
|
2019-07-04 02:38:23 +00:00
|
|
|
available: true
|
2019-06-23 23:34:30 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
result.profile = Profile(
|
2019-06-25 00:38:18 +00:00
|
|
|
fullname: quote.selectText(".QuoteTweet-fullname").stripText(),
|
2019-06-27 19:07:29 +00:00
|
|
|
username: quote.attr("data-screen-name"),
|
2019-06-24 06:07:36 +00:00
|
|
|
verified: isVerified(quote)
|
2019-06-20 14:16:20 +00:00
|
|
|
)
|
|
|
|
|
2019-06-24 06:07:36 +00:00
|
|
|
result.getQuoteMedia(quote)
|
|
|
|
|
2019-06-26 16:51:21 +00:00
|
|
|
proc parseTweet*(node: XmlNode): Tweet =
|
2019-10-08 13:17:37 +00:00
|
|
|
if node == nil:
|
|
|
|
return Tweet()
|
|
|
|
|
2019-09-08 12:34:26 +00:00
|
|
|
if "withheld" in node.attr("class"):
|
|
|
|
return Tweet(tombstone: getTombstone(node.selectText(".Tombstone-label")))
|
|
|
|
|
2019-06-26 16:51:21 +00:00
|
|
|
let tweet = node.select(".tweet")
|
2019-09-08 12:34:26 +00:00
|
|
|
if tweet == nil:
|
2019-08-20 20:44:11 +00:00
|
|
|
return Tweet()
|
2019-06-26 16:51:21 +00:00
|
|
|
|
2019-06-21 00:30:57 +00:00
|
|
|
result = Tweet(
|
2019-06-27 19:07:29 +00:00
|
|
|
id: tweet.attr("data-item-id"),
|
2019-07-04 12:54:15 +00:00
|
|
|
threadId: tweet.attr("data-conversation-id"),
|
2019-06-23 23:34:30 +00:00
|
|
|
text: getTweetText(tweet),
|
|
|
|
time: getTimestamp(tweet),
|
|
|
|
shortTime: getShortTime(tweet),
|
2019-06-27 19:07:29 +00:00
|
|
|
profile: parseTweetProfile(tweet),
|
2019-07-01 21:48:25 +00:00
|
|
|
stats: parseTweetStats(tweet),
|
2019-07-01 22:52:50 +00:00
|
|
|
reply: parseTweetReply(tweet),
|
2019-07-03 10:27:18 +00:00
|
|
|
hasThread: tweet.select(".content > .self-thread-context") != nil,
|
2019-06-27 19:07:29 +00:00
|
|
|
pinned: "pinned" in tweet.attr("class"),
|
|
|
|
available: true
|
2019-06-21 00:30:57 +00:00
|
|
|
)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-23 23:34:30 +00:00
|
|
|
result.getTweetMedia(tweet)
|
2019-07-15 11:40:59 +00:00
|
|
|
result.getTweetCard(tweet)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-06-21 00:30:57 +00:00
|
|
|
let by = tweet.selectText(".js-retweet-text > a > b")
|
|
|
|
if by.len > 0:
|
2019-09-18 18:54:07 +00:00
|
|
|
result.retweet = some Retweet(
|
2019-07-01 21:22:00 +00:00
|
|
|
by: stripText(by),
|
|
|
|
id: tweet.attr("data-retweet-id")
|
2019-09-18 18:54:07 +00:00
|
|
|
)
|
2019-06-21 00:30:57 +00:00
|
|
|
|
2019-06-26 16:51:21 +00:00
|
|
|
let quote = tweet.select(".QuoteTweet-innerContainer")
|
2019-06-27 19:07:29 +00:00
|
|
|
if quote != nil:
|
2019-09-18 18:54:07 +00:00
|
|
|
result.quote = some parseQuote(quote)
|
2019-07-10 21:38:52 +00:00
|
|
|
|
|
|
|
let tombstone = tweet.select(".Tombstone")
|
|
|
|
if tombstone != nil:
|
|
|
|
if "unavailable" in tombstone.innerText():
|
2019-09-08 12:34:26 +00:00
|
|
|
let quote = Quote(tombstone: getTombstone(node.selectText(".Tombstone-label")))
|
2019-09-18 18:54:07 +00:00
|
|
|
result.quote = some quote
|
2019-06-24 06:07:36 +00:00
|
|
|
|
2019-10-08 18:47:45 +00:00
|
|
|
proc parseChain*(nodes: XmlNode): Chain =
|
2019-06-29 04:31:02 +00:00
|
|
|
if nodes == nil: return
|
2019-10-08 18:47:45 +00:00
|
|
|
result = Chain()
|
2019-06-29 04:31:02 +00:00
|
|
|
for n in nodes.filterIt(it.kind != xnText):
|
|
|
|
let class = n.attr("class").toLower()
|
2019-08-20 20:44:11 +00:00
|
|
|
if "tombstone" in class or "unavailable" in class or "withheld" in class:
|
2019-08-23 00:15:25 +00:00
|
|
|
result.content.add Tweet()
|
2019-07-01 01:13:12 +00:00
|
|
|
elif "morereplies" in class:
|
|
|
|
result.more = getMoreReplies(n)
|
|
|
|
else:
|
2019-08-23 00:15:25 +00:00
|
|
|
result.content.add parseTweet(n)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-09-24 13:39:04 +00:00
|
|
|
proc parseConversation*(node: XmlNode; after: string): Conversation =
|
2019-09-08 12:34:26 +00:00
|
|
|
let tweet = node.select(".permalink-tweet-container")
|
|
|
|
|
|
|
|
if tweet == nil:
|
|
|
|
return Conversation(tweet: parseTweet(node.select(".permalink-tweet-withheld")))
|
|
|
|
|
2019-06-24 03:14:14 +00:00
|
|
|
result = Conversation(
|
2019-09-08 12:34:26 +00:00
|
|
|
tweet: parseTweet(tweet),
|
2019-10-08 18:47:45 +00:00
|
|
|
before: parseChain(node.select(".in-reply-to .stream-items")),
|
|
|
|
replies: Result[Chain](
|
2019-09-24 13:39:04 +00:00
|
|
|
minId: node.selectAttr(".replies-to .stream-container", "data-min-position"),
|
|
|
|
hasMore: node.select(".stream-footer .has-more-items") != nil,
|
|
|
|
beginning: after.len == 0
|
|
|
|
)
|
2019-06-24 03:14:14 +00:00
|
|
|
)
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-09-24 13:39:04 +00:00
|
|
|
let showMore = node.selectAttr(".ThreadedConversation-showMoreThreads button",
|
|
|
|
"data-cursor")
|
|
|
|
|
|
|
|
if showMore.len > 0:
|
|
|
|
result.replies.minId = showMore
|
|
|
|
result.replies.hasMore = true
|
|
|
|
|
2019-06-29 04:31:02 +00:00
|
|
|
let replies = node.select(".replies-to .stream-items")
|
2019-06-27 19:07:29 +00:00
|
|
|
if replies == nil: return
|
2019-06-20 14:16:20 +00:00
|
|
|
|
2019-07-01 21:48:33 +00:00
|
|
|
for i, reply in replies.filterIt(it.kind != xnText):
|
2019-06-29 04:31:02 +00:00
|
|
|
let class = reply.attr("class").toLower()
|
|
|
|
let thread = reply.select(".stream-items")
|
|
|
|
|
2019-07-01 21:48:33 +00:00
|
|
|
if i == 0 and "self" in class:
|
2019-10-08 18:47:45 +00:00
|
|
|
result.after = parseChain(thread)
|
2019-06-29 04:31:02 +00:00
|
|
|
elif "lone" in class:
|
2019-10-08 18:47:45 +00:00
|
|
|
result.replies.content.add parseChain(reply)
|
2019-06-29 04:31:02 +00:00
|
|
|
else:
|
2019-10-08 18:47:45 +00:00
|
|
|
result.replies.content.add parseChain(thread)
|
2019-06-24 03:14:14 +00:00
|
|
|
|
2019-08-11 19:26:55 +00:00
|
|
|
proc parseTimeline*(node: XmlNode; after: string): Timeline =
|
2019-09-19 00:23:22 +00:00
|
|
|
if node == nil: return Timeline()
|
2019-08-11 19:26:55 +00:00
|
|
|
result = Timeline(
|
2019-10-08 18:47:45 +00:00
|
|
|
content: parseChain(node.select(".stream > .stream-items")).content,
|
2019-08-11 19:26:55 +00:00
|
|
|
minId: node.attr("data-min-position"),
|
|
|
|
maxId: node.attr("data-max-position"),
|
|
|
|
hasMore: node.select(".has-more-items") != nil,
|
|
|
|
beginning: after.len == 0
|
|
|
|
)
|
|
|
|
|
2019-08-06 17:02:38 +00:00
|
|
|
proc parseVideo*(node: JsonNode; tweetId: string): Video =
|
2019-06-29 05:45:36 +00:00
|
|
|
let
|
|
|
|
track = node{"track"}
|
|
|
|
cType = track["contentType"].to(string)
|
|
|
|
pType = track["playbackType"].to(string)
|
2019-06-25 05:37:44 +00:00
|
|
|
|
2019-06-29 05:45:36 +00:00
|
|
|
case cType
|
2019-06-25 05:37:44 +00:00
|
|
|
of "media_entity":
|
|
|
|
result = Video(
|
2019-06-29 05:45:36 +00:00
|
|
|
playbackType: if "mp4" in pType: mp4 else: m3u8,
|
|
|
|
contentId: track["contentId"].to(string),
|
|
|
|
durationMs: track["durationMs"].to(int),
|
2019-06-25 05:37:44 +00:00
|
|
|
views: track["viewCount"].to(string),
|
|
|
|
url: track["playbackUrl"].to(string),
|
2019-08-19 20:03:00 +00:00
|
|
|
available: track{"mediaAvailability"}["status"].to(string) == "available",
|
|
|
|
reason: track{"mediaAvailability"}["reason"].to(string))
|
2019-06-25 05:37:44 +00:00
|
|
|
of "vmap":
|
|
|
|
result = Video(
|
2019-06-29 05:45:36 +00:00
|
|
|
playbackType: vmap,
|
2019-09-17 19:17:03 +00:00
|
|
|
durationMs: track.getOrDefault("durationMs").getInt(0),
|
2019-08-19 20:03:00 +00:00
|
|
|
url: track["vmapUrl"].to(string),
|
|
|
|
available: true)
|
2019-06-25 05:37:44 +00:00
|
|
|
else:
|
2019-06-29 05:45:36 +00:00
|
|
|
echo "Can't parse video of type ", cType
|
|
|
|
|
2019-08-06 17:02:38 +00:00
|
|
|
result.videoId = tweetId
|
2019-06-29 05:45:36 +00:00
|
|
|
result.thumb = node["posterImage"].to(string)
|
2019-06-29 12:11:23 +00:00
|
|
|
|
|
|
|
proc parsePoll*(node: XmlNode): Poll =
|
|
|
|
let
|
|
|
|
choices = node.selectAll(".PollXChoice-choice")
|
|
|
|
votes = node.selectText(".PollXChoice-footer--total")
|
|
|
|
|
|
|
|
result.votes = votes.strip().split(" ")[0]
|
|
|
|
result.status = node.selectText(".PollXChoice-footer--time")
|
|
|
|
|
|
|
|
for choice in choices:
|
|
|
|
for span in choice.select(".PollXChoice-choice--text").filterIt(it.kind != xnText):
|
|
|
|
if span.attr("class").len == 0:
|
|
|
|
result.options.add span.innerText()
|
|
|
|
elif "progress" in span.attr("class"):
|
|
|
|
result.values.add parseInt(span.innerText()[0 .. ^2])
|
|
|
|
|
|
|
|
var highest = 0
|
|
|
|
for i, n in result.values:
|
|
|
|
if n > highest:
|
|
|
|
highest = n
|
|
|
|
result.leader = i
|
2019-07-04 02:18:32 +00:00
|
|
|
|
|
|
|
proc parsePhotoRail*(node: XmlNode): seq[GalleryPhoto] =
|
|
|
|
for img in node.selectAll(".tweet-media-img-placeholder"):
|
|
|
|
result.add GalleryPhoto(
|
|
|
|
url: img.attr("data-image-url"),
|
|
|
|
tweetId: img.attr("data-tweet-id"),
|
2019-07-10 22:42:31 +00:00
|
|
|
color: img.attr("background-color").replace("style: ", "")
|
2019-07-04 02:18:32 +00:00
|
|
|
)
|
2019-07-11 17:22:23 +00:00
|
|
|
|
|
|
|
proc parseCard*(card: var Card; node: XmlNode) =
|
|
|
|
card.title = node.selectText("h2.TwitterCard-title")
|
|
|
|
card.text = node.selectText("p.tcu-resetMargin")
|
|
|
|
card.dest = node.selectText("span.SummaryCard-destination")
|
|
|
|
|
2019-07-15 11:40:59 +00:00
|
|
|
if card.url.len == 0:
|
|
|
|
card.url = node.select("a").attr("href")
|
|
|
|
|
|
|
|
let image = node.select(".tcu-imageWrapper img")
|
2019-07-11 17:22:23 +00:00
|
|
|
if image != nil:
|
|
|
|
# workaround for issue 11713
|
2019-09-18 18:54:07 +00:00
|
|
|
card.image = some image.attr("data-src").replace("gname", "g&name")
|
2019-07-11 17:22:23 +00:00
|
|
|
|
|
|
|
if card.kind == liveEvent:
|
|
|
|
card.text = card.title
|
|
|
|
card.title = node.selectText(".TwitterCard-attribution--category")
|