From 1ed89aad3eff72231bfec6c0d8d1fcf6b8c2c8e8 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 20:45:57 +0100 Subject: [PATCH 1/7] Use more often playerResponse in yt stream extractor This enhances performance and should make the extractor more reliable since it get info from a stable json structure that shouldn't be subject to many changes. Fallback html methods have been kept. In case of error the thrown exception contains the data about the playerResponse failure, that should be clearer than a NPE caused by not-found html tags. --- .../extractors/YoutubeStreamExtractor.java | 179 ++++++++++-------- 1 file changed, 100 insertions(+), 79 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index c05004ed..ef1edad3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -106,19 +106,21 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getName() throws ParsingException { assertPageFetched(); - String name = getStringFromMetaData("title"); - if(name == null) { - // Fallback to HTML method + try { + return playerResponse.getObject("videoDetails").getString("title"); + + } catch (Exception e) { + // fallback HTML method + String name = null; try { name = doc.select("meta[name=title]").attr(CONTENT); - } catch (Exception e) { - throw new ParsingException("Could not get the title", e); + } catch (Exception ignored) {} + + if (name == null) { + throw new ParsingException("Could not get name", e); } + return name; } - if(name == null || name.isEmpty()) { - throw new ParsingException("Could not get the title"); - } - return name; } @Override @@ -128,9 +130,17 @@ public class YoutubeStreamExtractor extends StreamExtractor { } try { - return doc.select("meta[itemprop=datePublished]").attr(CONTENT); - } catch (Exception e) {//todo: add fallback method - throw new ParsingException("Could not get upload date", e); + return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + } catch (Exception e) { + String uploadDate = null; + try { + uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT); + } catch (Exception ignored) {} + + if (uploadDate == null) { + throw new ParsingException("Could not get upload date", e); + } + return uploadDate; } } @@ -149,24 +159,23 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getThumbnailUrl() throws ParsingException { assertPageFetched(); - // Try to get high resolution thumbnail first, if it fails, use low res from the player instead try { - return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); - } catch (Exception ignored) { - // Try other method... - } + JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails"); + // the last thumbnail is the one with the highest resolution + return thumbnails.getObject(thumbnails.size()-1).getString("url"); - try { - if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url"); - } catch (Exception ignored) { - // Try other method... - } - - try { - return videoInfoPage.get("thumbnail_url"); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + String url = null; + try { + url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); + } catch (Exception ignored) {} + + if (url == null) { + throw new ParsingException("Could not get thumbnail url", e); + } + return url; } + } @Nonnull @@ -174,9 +183,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getDescription() throws ParsingException { assertPageFetched(); try { + // first try to get html-formatted description return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()); } catch (Exception e) { - throw new ParsingException("Could not get the description", e); + try { + // fallback to raw non-html description + return playerResponse.getObject("videoDetails").getString("shortDescription"); + } catch (Exception ignored) { + throw new ParsingException("Could not get the description", e); + } } } @@ -269,25 +284,22 @@ public class YoutubeStreamExtractor extends StreamExtractor { public long getLength() throws ParsingException { assertPageFetched(); - // try getting duration from playerargs - try { - String durationMs = playerResponse - .getObject("streamingData") - .getArray("formats") - .getObject(0) - .getString("approxDurationMs"); - return Long.parseLong(durationMs)/1000; - } catch (Exception e) { - } - - //try getting value from age gated video try { String duration = playerResponse .getObject("videoDetails") .getString("lengthSeconds"); return Long.parseLong(duration); } catch (Exception e) { - throw new ParsingException("Every methode to get the duration has failed: ", e); + try { + String durationMs = playerResponse + .getObject("streamingData") + .getArray("formats") + .getObject(0) + .getString("approxDurationMs"); + return Math.round(Long.parseLong(durationMs)/1000.0f); + } catch (Exception ignored) { + throw new ParsingException("Could not get duration", e); + } } } @@ -307,11 +319,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { if (getStreamType().equals(StreamType.LIVE_STREAM)) { return getLiveStreamWatchingCount(); + } else { + return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); + } + } catch (Exception e) { + try { + return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); + } catch (Exception ignored) { + throw new ParsingException("Could not get view count", e); } - - return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); - } catch (Exception e) {//todo: find fallback method - throw new ParsingException("Could not get number of views", e); } } @@ -373,7 +389,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { likesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { - //if this kicks in our button has no content and therefore likes/dislikes are disabled + //if this kicks in our button has no content and therefore ratings must be disabled + if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { + throw new ParsingException("Ratings are enabled even though the like button is missing", e); + } return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(likesString)); @@ -393,7 +412,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { dislikesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { - //if this kicks in our button has no content and therefore likes/dislikes are disabled + //if this kicks in our button has no content and therefore ratings must be disabled + if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { + throw new ParsingException("Ratings are enabled even though the dislike button is missing", e); + } return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString)); @@ -409,60 +431,59 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getUploaderUrl() throws ParsingException { assertPageFetched(); try { - return doc.select("div[class=\"yt-user-info\"]").first().children() - .select("a").first().attr("abs:href"); + return "https://www.youtube.com/channel/" + + playerResponse.getObject("videoDetails").getString("channelId"); } catch (Exception e) { - throw new ParsingException("Could not get channel link", e); - } - } + String uploaderUrl = null; + try { + uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children() + .select("a").first().attr("abs:href"); + } catch (Exception ignored) {} - - @Nullable - private String getStringFromMetaData(String field) { - assertPageFetched(); - String value = null; - if(playerArgs != null) { - // This can not fail - value = playerArgs.getString(field); + if (uploaderUrl == null) { + throw new ParsingException("Could not get channel link", e); + } + return uploaderUrl; } - if(value == null) { - // This can not fail too - value = videoInfoPage.get(field); - } - return value; } @Nonnull @Override public String getUploaderName() throws ParsingException { assertPageFetched(); - String name = getStringFromMetaData("author"); - - if(name == null) { + try { + return playerResponse.getObject("videoDetails").getString("author"); + } catch (Exception e) { + String name = null; try { - // Fallback to HTML method name = doc.select("div.yt-user-info").first().text(); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name", e); + } catch (Exception ignored) {} + + if (name == null) { + throw new ParsingException("Could not get uploader name"); } + return name; } - if(name == null || name.isEmpty()) { - throw new ParsingException("Could not get uploader name"); - } - return name; } @Nonnull @Override public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); + + String uploaderAvatarUrl = null; try { - return doc.select("a[class*=\"yt-user-photo\"]").first() + uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first() .select("img").first() .attr("abs:data-thumb"); } catch (Exception e) {//todo: add fallback method - throw new ParsingException("Could not get uploader thumbnail URL.", e); + throw new ParsingException("Could not get uploader avatar url", e); } + + if (uploaderAvatarUrl == null) { + throw new ParsingException("Could not get uploader avatar url"); + } + return uploaderAvatarUrl; } @Nonnull @@ -590,12 +611,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { public StreamType getStreamType() throws ParsingException { assertPageFetched(); try { - if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") || - (!playerResponse.getObject("streamingData").has(FORMATS)))) { + if (!playerResponse.getObject("streamingData").has(FORMATS) || + (playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) { return StreamType.LIVE_STREAM; } } catch (Exception e) { - throw new ParsingException("Could not get hls manifest url", e); + throw new ParsingException("Could not get stream type", e); } return StreamType.VIDEO_STREAM; } From 1689037b0115d440bce5ee5e612ab646cafa496f Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 20:49:08 +0100 Subject: [PATCH 2/7] Fix getLength() tests now that the value is rounded (not floor-ed) In yt stream extractor --- .../youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java | 2 +- .../youtube/stream/YoutubeStreamExtractorDefaultTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java index 992a8cd9..b9da6d17 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java @@ -75,7 +75,7 @@ public class YoutubeStreamExtractorAgeRestrictedTest { @Test public void testGetLength() throws ParsingException { - assertEquals(1789, extractor.getLength()); + assertEquals(1790, extractor.getLength()); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index e715144a..14b78501 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -101,7 +101,7 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetLength() throws ParsingException { - assertEquals(366, extractor.getLength()); + assertEquals(367, extractor.getLength()); } @Test From d5ca02f3f69690b1bab065639db139747a4997b4 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 21:41:15 +0100 Subject: [PATCH 3/7] Fix test failing because music channel could be Vevo In yt stream extractor test. The Vevo and the normal music channel are equivalent, so Youtube picks one of them at random, and in playerResponse the channel id is Vevo's one. --- .../youtube/stream/YoutubeStreamExtractorDefaultTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index 14b78501..8dd35ce7 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -124,7 +124,11 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetUploaderUrl() throws ParsingException { - assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl()); + String url = extractor.getUploaderUrl(); + if (!url.equals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw") && + !url.equals("https://www.youtube.com/channel/UComP_epzeKzvBX156r6pm1Q")) { + fail("Uploader url is neither the music channel one nor the Vevo one"); + } } @Test From 1bb6cdee225a8544bdc87311523f4abb859efcd6 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 21:43:24 +0100 Subject: [PATCH 4/7] Enable commented-out test for yt stream extractor --- .../youtube/stream/YoutubeStreamExtractorControversialTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java index 7faec805..5a063a93 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java @@ -65,7 +65,7 @@ public class YoutubeStreamExtractorControversialTest { @Test public void testGetDescription() throws ParsingException { assertNotNull(extractor.getDescription()); -// assertFalse(extractor.getDescription().isEmpty()); + assertFalse(extractor.getDescription().isEmpty()); } @Test From 7ba04836ebe15268e12c5055d0919d61a8dfe1f4 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 23:57:08 +0100 Subject: [PATCH 5/7] Add tests for ratings in yt stream extractor tests --- .../YoutubeStreamExtractorDefaultTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index 8dd35ce7..5f21df55 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -187,6 +187,18 @@ public class YoutubeStreamExtractorDefaultTest { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty()); } + + @Test + public void testGetLikeCount() throws ParsingException { + long likeCount = extractor.getLikeCount(); + assertTrue("" + likeCount, likeCount >= 15000000); + } + + @Test + public void testGetDislikeCount() throws ParsingException { + long dislikeCount = extractor.getDislikeCount(); + assertTrue("" + dislikeCount, dislikeCount >= 818000); + } } public static class DescriptionTestPewdiepie { @@ -249,6 +261,29 @@ public class YoutubeStreamExtractorDefaultTest { } } + public static class RatingsDisabledTest { + private static YoutubeStreamExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (YoutubeStreamExtractor) YouTube + .getStreamExtractor("https://www.youtube.com/watch?v=HRKu0cvrr_o"); + extractor.fetchPage(); + } + + @Test + public void testGetLikeCount() throws ParsingException { + assertEquals(-1, extractor.getLikeCount()); + } + + @Test + public void testGetDislikeCount() throws ParsingException { + assertEquals(-1, extractor.getDislikeCount()); + } + + } + public static class FramesTest { private static YoutubeStreamExtractor extractor; From 1e1100ef76c2a9cb917759374bc0bc2dddb53c47 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 20 Jan 2020 22:52:36 +0100 Subject: [PATCH 6/7] Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java Co-Authored-By: Tobias Groza --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index ef1edad3..45874906 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -162,7 +162,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails"); // the last thumbnail is the one with the highest resolution - return thumbnails.getObject(thumbnails.size()-1).getString("url"); + return thumbnails.getObject(thumbnails.size() - 1).getString("url"); } catch (Exception e) { String url = null; From 1cfdc4547a41a70caa6b368b7267b68d04531e53 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 20 Jan 2020 22:52:48 +0100 Subject: [PATCH 7/7] Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java Co-Authored-By: Tobias Groza --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 45874906..d0e0ff79 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -296,7 +296,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { .getArray("formats") .getObject(0) .getString("approxDurationMs"); - return Math.round(Long.parseLong(durationMs)/1000.0f); + return Math.round(Long.parseLong(durationMs) / 1000f); } catch (Exception ignored) { throw new ParsingException("Could not get duration", e); }