Make channel extractor more robust

This commit is contained in:
Omar Roth 2018-10-21 21:44:20 -05:00
parent ef95dc2380
commit ed3d9ce540

View file

@ -189,15 +189,15 @@ end
def get_about_info(ucid) def get_about_info(ucid)
client = make_client(YT_URL) client = make_client(YT_URL)
about = client.get("/user/#{ucid}/about?disable_polymer=1") about = client.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
about = XML.parse_html(about.body) about = XML.parse_html(about.body)
if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)) if !about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a))
about = client.get("/channel/#{ucid}/about?disable_polymer=1") about = client.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
about = XML.parse_html(about.body) about = XML.parse_html(about.body)
end end
if !about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)) if !about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a))
raise "User does not exist." raise "User does not exist."
end end
@ -207,7 +207,7 @@ def get_about_info(ucid)
end end
sub_count ||= 0 sub_count ||= 0
author = about.xpath_node(%q(//span[@class="qualified-channel-title-text"]/a)).not_nil!.content author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content
ucid = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"].split("/")[-1] ucid = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"].split("/")[-1]
# Auto-generated channels # Auto-generated channels