From 1f1bbaad57cbe2ee4071c61f983a0a59f06f12ee Mon Sep 17 00:00:00 2001 From: John Zhen Mo Date: Thu, 1 Feb 2018 13:27:14 -0800 Subject: [PATCH] -Fixed subtitles extraction to use method from youtube-dl. -Expose subtitles during extraction. -Make subtitle lists return nonnull empty collections instead of null. --- .../schabi/newpipe/extractor/Subtitles.java | 4 +- .../soundcloud/SoundcloudStreamExtractor.java | 8 +- .../youtube/YoutubeStreamExtractor.java | 97 ++++++++++--------- .../extractor/stream/StreamExtractor.java | 5 +- .../newpipe/extractor/stream/StreamInfo.java | 16 +++ .../YoutubeStreamExtractorDefaultTest.java | 5 +- .../YoutubeStreamExtractorRestrictedTest.java | 4 +- 7 files changed, 82 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/schabi/newpipe/extractor/Subtitles.java b/src/main/java/org/schabi/newpipe/extractor/Subtitles.java index 9427bf52..bb5fd919 100644 --- a/src/main/java/org/schabi/newpipe/extractor/Subtitles.java +++ b/src/main/java/org/schabi/newpipe/extractor/Subtitles.java @@ -2,7 +2,9 @@ package org.schabi.newpipe.extractor; import org.schabi.newpipe.extractor.stream.SubtitlesFormat; -public class Subtitles { +import java.io.Serializable; + +public class Subtitles implements Serializable { private final SubtitlesFormat format; private final String languageCode; private final String URL; diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java index c2dc61dc..027ee699 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java @@ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor { } @Override - @Nullable + @Nonnull public List getSubtitlesDefault() throws IOException, ExtractionException { - return null; + return Collections.emptyList(); } @Override - @Nullable + @Nonnull public List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { - return null; + return Collections.emptyList(); } @Override diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 8376d77c..85e43221 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; @@ -26,8 +27,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /* * Created by Christian Schabesberger on 06.08.15. @@ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } + public class SubtitlesException extends ContentNotAvailableException { + SubtitlesException(String message, Throwable cause) { + super(message, cause); + } + } + /*//////////////////////////////////////////////////////////////////////////*/ private Document doc; @@ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { private JsonObject playerArgs; @Nonnull private final Map videoInfoPage = new HashMap<>(); + @Nonnull + private List availableSubtitles = new ArrayList<>(); private boolean isAgeRestricted; @@ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Override - @Nullable + @Nonnull public List getSubtitlesDefault() throws IOException, ExtractionException { - return getSubtitles(SubtitlesFormat.TTML); + return getSubtitles(SubtitlesFormat.VTT); } @Override - @Nullable + @Nonnull public List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { assertPageFetched(); - if(isAgeRestricted) { - // If the video is age restricted getPlayerConfig will fail - return null; + List subtitles = new ArrayList<>(); + for (final Subtitles subtitle : availableSubtitles) { + if (subtitle.getFileType() == format) subtitles.add(subtitle); } - // TODO: This should be done in onFetchPage() - JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader())); - String playerResponse = playerConfig.getObject("args").getString("player_response"); - - JsonObject captions; - try { - // Captions does not exist, return null - if (!JsonParser.object().from(playerResponse).has("captions")) return null; - - captions = JsonParser.object().from(playerResponse).getObject("captions"); - } catch (JsonParserException e) { - // Failed to parse subtitles - return null; - } - JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks"); - - int captionsSize = captionsArray.size(); - // Should not happen, if there is the "captions" object, it should always has some captions in it - if(captionsSize == 0) return null; - - List result = new ArrayList<>(); - for (int x = 0; x < captionsSize; x++) { - String baseUrl = captionsArray.getObject(x).getString("baseUrl"); - - String extension = format.getExtension(); - - String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension); - String captionsLangCode = captionsArray.getObject(x).getString("vssId"); - boolean isAutoGenerated = captionsLangCode.startsWith("a."); - String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", ""); - - result.add(new Subtitles(format, languageCode, URL, isAutoGenerated)); - } - - return result; + return subtitles; } @Override @@ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { if (decryptionCode.isEmpty()) { decryptionCode = loadDecryptionCode(playerUrl); } + + if (availableSubtitles.isEmpty()) { + availableSubtitles.addAll(getAvailableSubtitles(getId())); + } } private JsonObject getPlayerConfig(String pageContent) throws ParsingException { @@ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { return result == null ? "" : result.toString(); } + private List getAvailableSubtitles(final String id) throws SubtitlesException { + try { + final String listingUrl = getVideoSubtitlesListingUrl(id); + final String pageContent = NewPipe.getDownloader().download(listingUrl); + final Document listing = Jsoup.parse(pageContent, listingUrl); + final Elements tracks = listing.select("track"); + + List subtitles = new ArrayList<>(tracks.size() * 5); + for (final Element track : tracks) { + final String languageCode = track.attr("lang_code"); + subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML)); + subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT)); + // todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer + } + return subtitles; + } catch (IOException | ReCaptchaException e) { + throw new SubtitlesException("Unable to download subtitles listing", e); + } + } /*////////////////////////////////////////////////////////////////////////// // Data Class //////////////////////////////////////////////////////////////////////////*/ @@ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { //////////////////////////////////////////////////////////////////////////*/ @Nonnull - private String getVideoInfoUrl(final String id, final String sts) { + private static String getVideoInfoUrl(final String id, final String sts) { return "https://www.youtube.com/get_video_info?" + "video_id=" + id + "&eurl=https://youtube.googleapis.com/v/" + id + "&sts=" + sts + "&ps=default&gl=US&hl=en"; } + @Nonnull + private static String getVideoSubtitlesListingUrl(final String id) { + return "https://video.google.com/timedtext?type=list&v=" + id; + } + + @Nonnull + private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) { + final String url = "https://www.youtube.com/api/timedtext?lang=" + locale + + "&fmt=" + format.getExtension() + "&name=&v=" + id; + // These are all non-generated + return new Subtitles(format, locale, url, false); + } + private Map getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { Map urlAndItags = new LinkedHashMap<>(); diff --git a/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java index 61df951f..bdf8179d 100644 --- a/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java @@ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor { public abstract List getVideoStreams() throws IOException, ExtractionException; public abstract List getVideoOnlyStreams() throws IOException, ExtractionException; - @Nullable + @Nonnull public abstract List getSubtitlesDefault() throws IOException, ExtractionException; - - @Nullable + @Nonnull public abstract List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException; public abstract StreamType getStreamType() throws ParsingException; diff --git a/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index be0da42b..27df6d33 100644 --- a/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -138,6 +138,10 @@ public class StreamInfo extends Info { return start_position; } + public List getSubtitles() { + return subtitles; + } + public void setStreamType(StreamType stream_type) { this.stream_type = stream_type; } @@ -214,6 +218,10 @@ public class StreamInfo extends Info { this.start_position = start_position; } + public void setSubtitles(List subtitles) { + this.subtitles = subtitles; + } + public static class StreamExtractException extends ExtractionException { StreamExtractException(String message) { super(message); @@ -313,6 +321,12 @@ public class StreamInfo extends Info { streamInfo.addError(new ExtractionException("Couldn't get video only streams", e)); } + try { + streamInfo.setSubtitles(extractor.getSubtitlesDefault()); + } catch (Exception e) { + streamInfo.addError(new ExtractionException("Couldn't get subtitles", e)); + } + // Lists can be null if a exception was thrown during extraction if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.emptyList()); if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.emptyList()); @@ -444,4 +458,6 @@ public class StreamInfo extends Info { public List related_streams; //in seconds. some metadata is not passed using a StreamInfo object! public long start_position = 0; + + public List subtitles; } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index 7facda01..d18b43d2 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube; import org.junit.BeforeClass; import org.junit.Test; import org.schabi.newpipe.Downloader; -import org.schabi.newpipe.extractor.ExtractorAsserts; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetSubtitlesListDefault() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertTrue(extractor.getSubtitlesDefault() == null); + assertTrue(extractor.getSubtitlesDefault().isEmpty()); } @Test public void testGetSubtitlesList() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); + assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java index 622b1e86..0589a9cd 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java @@ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest { @Test public void testGetSubtitlesListDefault() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertNull(extractor.getSubtitlesDefault()); + assertTrue(extractor.getSubtitlesDefault().isEmpty()); } @Test public void testGetSubtitlesList() throws IOException, ExtractionException { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null - assertNull(extractor.getSubtitles(SubtitlesFormat.VTT)); + assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); } }