-Fixed subtitles extraction to use method from youtube-dl.
-Expose subtitles during extraction. -Make subtitle lists return nonnull empty collections instead of null.
This commit is contained in:
parent
0061131d39
commit
1f1bbaad57
7 changed files with 82 additions and 57 deletions
|
@ -2,7 +2,9 @@ package org.schabi.newpipe.extractor;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
|
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
|
||||||
|
|
||||||
public class Subtitles {
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public class Subtitles implements Serializable {
|
||||||
private final SubtitlesFormat format;
|
private final SubtitlesFormat format;
|
||||||
private final String languageCode;
|
private final String languageCode;
|
||||||
private final String URL;
|
private final String URL;
|
||||||
|
|
|
@ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nullable
|
@Nonnull
|
||||||
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
||||||
return null;
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nullable
|
@Nonnull
|
||||||
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
||||||
return null;
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
import org.mozilla.javascript.Context;
|
import org.mozilla.javascript.Context;
|
||||||
import org.mozilla.javascript.Function;
|
import org.mozilla.javascript.Function;
|
||||||
import org.mozilla.javascript.ScriptableObject;
|
import org.mozilla.javascript.ScriptableObject;
|
||||||
|
@ -26,8 +27,6 @@ import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 06.08.15.
|
* Created by Christian Schabesberger on 06.08.15.
|
||||||
|
@ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class SubtitlesException extends ContentNotAvailableException {
|
||||||
|
SubtitlesException(String message, Throwable cause) {
|
||||||
|
super(message, cause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////*/
|
/*//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
@ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
private JsonObject playerArgs;
|
private JsonObject playerArgs;
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private final Map<String, String> videoInfoPage = new HashMap<>();
|
private final Map<String, String> videoInfoPage = new HashMap<>();
|
||||||
|
@Nonnull
|
||||||
|
private List<Subtitles> availableSubtitles = new ArrayList<>();
|
||||||
|
|
||||||
private boolean isAgeRestricted;
|
private boolean isAgeRestricted;
|
||||||
|
|
||||||
|
@ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nullable
|
@Nonnull
|
||||||
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
||||||
return getSubtitles(SubtitlesFormat.TTML);
|
return getSubtitles(SubtitlesFormat.VTT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Nullable
|
@Nonnull
|
||||||
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
if(isAgeRestricted) {
|
List<Subtitles> subtitles = new ArrayList<>();
|
||||||
// If the video is age restricted getPlayerConfig will fail
|
for (final Subtitles subtitle : availableSubtitles) {
|
||||||
return null;
|
if (subtitle.getFileType() == format) subtitles.add(subtitle);
|
||||||
}
|
}
|
||||||
// TODO: This should be done in onFetchPage()
|
return subtitles;
|
||||||
JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
|
|
||||||
String playerResponse = playerConfig.getObject("args").getString("player_response");
|
|
||||||
|
|
||||||
JsonObject captions;
|
|
||||||
try {
|
|
||||||
// Captions does not exist, return null
|
|
||||||
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
|
|
||||||
|
|
||||||
captions = JsonParser.object().from(playerResponse).getObject("captions");
|
|
||||||
} catch (JsonParserException e) {
|
|
||||||
// Failed to parse subtitles
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
|
|
||||||
|
|
||||||
int captionsSize = captionsArray.size();
|
|
||||||
// Should not happen, if there is the "captions" object, it should always has some captions in it
|
|
||||||
if(captionsSize == 0) return null;
|
|
||||||
|
|
||||||
List<Subtitles> result = new ArrayList<>();
|
|
||||||
for (int x = 0; x < captionsSize; x++) {
|
|
||||||
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
|
|
||||||
|
|
||||||
String extension = format.getExtension();
|
|
||||||
|
|
||||||
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
|
|
||||||
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
|
|
||||||
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
|
|
||||||
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
|
|
||||||
|
|
||||||
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
if (decryptionCode.isEmpty()) {
|
if (decryptionCode.isEmpty()) {
|
||||||
decryptionCode = loadDecryptionCode(playerUrl);
|
decryptionCode = loadDecryptionCode(playerUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (availableSubtitles.isEmpty()) {
|
||||||
|
availableSubtitles.addAll(getAvailableSubtitles(getId()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
|
private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
|
||||||
|
@ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
return result == null ? "" : result.toString();
|
return result == null ? "" : result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException {
|
||||||
|
try {
|
||||||
|
final String listingUrl = getVideoSubtitlesListingUrl(id);
|
||||||
|
final String pageContent = NewPipe.getDownloader().download(listingUrl);
|
||||||
|
final Document listing = Jsoup.parse(pageContent, listingUrl);
|
||||||
|
final Elements tracks = listing.select("track");
|
||||||
|
|
||||||
|
List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 5);
|
||||||
|
for (final Element track : tracks) {
|
||||||
|
final String languageCode = track.attr("lang_code");
|
||||||
|
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML));
|
||||||
|
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT));
|
||||||
|
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
|
||||||
|
}
|
||||||
|
return subtitles;
|
||||||
|
} catch (IOException | ReCaptchaException e) {
|
||||||
|
throw new SubtitlesException("Unable to download subtitles listing", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
/*//////////////////////////////////////////////////////////////////////////
|
/*//////////////////////////////////////////////////////////////////////////
|
||||||
// Data Class
|
// Data Class
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
@ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private String getVideoInfoUrl(final String id, final String sts) {
|
private static String getVideoInfoUrl(final String id, final String sts) {
|
||||||
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
||||||
"&eurl=https://youtube.googleapis.com/v/" + id +
|
"&eurl=https://youtube.googleapis.com/v/" + id +
|
||||||
"&sts=" + sts + "&ps=default&gl=US&hl=en";
|
"&sts=" + sts + "&ps=default&gl=US&hl=en";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
private static String getVideoSubtitlesListingUrl(final String id) {
|
||||||
|
return "https://video.google.com/timedtext?type=list&v=" + id;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) {
|
||||||
|
final String url = "https://www.youtube.com/api/timedtext?lang=" + locale +
|
||||||
|
"&fmt=" + format.getExtension() + "&name=&v=" + id;
|
||||||
|
// These are all non-generated
|
||||||
|
return new Subtitles(format, locale, url, false);
|
||||||
|
}
|
||||||
|
|
||||||
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
|
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
|
||||||
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();
|
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();
|
||||||
|
|
||||||
|
|
|
@ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor {
|
||||||
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
|
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
|
||||||
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
|
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
|
||||||
|
|
||||||
@Nullable
|
@Nonnull
|
||||||
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
|
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
|
||||||
|
@Nonnull
|
||||||
@Nullable
|
|
||||||
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
|
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
|
||||||
|
|
||||||
public abstract StreamType getStreamType() throws ParsingException;
|
public abstract StreamType getStreamType() throws ParsingException;
|
||||||
|
|
|
@ -138,6 +138,10 @@ public class StreamInfo extends Info {
|
||||||
return start_position;
|
return start_position;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Subtitles> getSubtitles() {
|
||||||
|
return subtitles;
|
||||||
|
}
|
||||||
|
|
||||||
public void setStreamType(StreamType stream_type) {
|
public void setStreamType(StreamType stream_type) {
|
||||||
this.stream_type = stream_type;
|
this.stream_type = stream_type;
|
||||||
}
|
}
|
||||||
|
@ -214,6 +218,10 @@ public class StreamInfo extends Info {
|
||||||
this.start_position = start_position;
|
this.start_position = start_position;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setSubtitles(List<Subtitles> subtitles) {
|
||||||
|
this.subtitles = subtitles;
|
||||||
|
}
|
||||||
|
|
||||||
public static class StreamExtractException extends ExtractionException {
|
public static class StreamExtractException extends ExtractionException {
|
||||||
StreamExtractException(String message) {
|
StreamExtractException(String message) {
|
||||||
super(message);
|
super(message);
|
||||||
|
@ -313,6 +321,12 @@ public class StreamInfo extends Info {
|
||||||
streamInfo.addError(new ExtractionException("Couldn't get video only streams", e));
|
streamInfo.addError(new ExtractionException("Couldn't get video only streams", e));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
streamInfo.setSubtitles(extractor.getSubtitlesDefault());
|
||||||
|
} catch (Exception e) {
|
||||||
|
streamInfo.addError(new ExtractionException("Couldn't get subtitles", e));
|
||||||
|
}
|
||||||
|
|
||||||
// Lists can be null if a exception was thrown during extraction
|
// Lists can be null if a exception was thrown during extraction
|
||||||
if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList());
|
if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList());
|
||||||
if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList());
|
if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList());
|
||||||
|
@ -444,4 +458,6 @@ public class StreamInfo extends Info {
|
||||||
public List<InfoItem> related_streams;
|
public List<InfoItem> related_streams;
|
||||||
//in seconds. some metadata is not passed using a StreamInfo object!
|
//in seconds. some metadata is not passed using a StreamInfo object!
|
||||||
public long start_position = 0;
|
public long start_position = 0;
|
||||||
|
|
||||||
|
public List<Subtitles> subtitles;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.schabi.newpipe.Downloader;
|
import org.schabi.newpipe.Downloader;
|
||||||
import org.schabi.newpipe.extractor.ExtractorAsserts;
|
|
||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
@ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
||||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||||
assertTrue(extractor.getSubtitlesDefault() == null);
|
assertTrue(extractor.getSubtitlesDefault().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
||||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||||
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
|
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
||||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||||
assertNull(extractor.getSubtitlesDefault());
|
assertTrue(extractor.getSubtitlesDefault().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
||||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||||
assertNull(extractor.getSubtitles(SubtitlesFormat.VTT));
|
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue