From 8d88093eb78bb94dd9b9ff47f12e5f5f4864646a Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Thu, 10 Aug 2017 19:59:41 -0300 Subject: [PATCH] Fix getRealIdFromShared occasionally failing --- .../youtube/YoutubeStreamUrlIdHandler.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java index 18f22ed1..65c73768 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamUrlIdHandler.java @@ -1,5 +1,8 @@ package org.schabi.newpipe.extractor.services.youtube; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.UrlIdHandler; @@ -125,8 +128,18 @@ public class YoutubeStreamUrlIdHandler implements UrlIdHandler { } catch (IOException | ReCaptchaException e) { throw new ParsingException("Unable to resolve shared link", e); } - // is this bad? is this fragile?: - String realId = Parser.matchGroup1("rel=\"shortlink\" href=\"https://youtu.be/" + ID_PATTERN, content); + Document document = Jsoup.parse(content); + String urlWithRealId; + + Element element = document.select("link[rel=\"canonical\"]").first(); + if (element != null) { + urlWithRealId = element.attr("abs:href"); + } else { + urlWithRealId = document.select("meta[property=\"og:url\"]").first() + .attr("abs:content"); + } + + String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); if (sharedId.equals(realId)) { throw new ParsingException("Got same id for as shared info_id: " + sharedId); }