-Fixed subtitles extraction to use method from youtube-dl.

-Expose subtitles during extraction.
-Make subtitle lists return nonnull empty collections instead of null.
This commit is contained in:
John Zhen Mo 2018-02-01 13:27:14 -08:00
parent 0061131d39
commit 1f1bbaad57
7 changed files with 82 additions and 57 deletions

View file

@ -2,7 +2,9 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
public class Subtitles {
import java.io.Serializable;
public class Subtitles implements Serializable {
private final SubtitlesFormat format;
private final String languageCode;
private final String URL;

View file

@ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
}
@Override
@Nullable
@Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return null;
return Collections.emptyList();
}
@Override
@Nullable
@Nonnull
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
return null;
return Collections.emptyList();
}
@Override

View file

@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -26,8 +27,6 @@ import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
* Created by Christian Schabesberger on 06.08.15.
@ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
public class SubtitlesException extends ContentNotAvailableException {
SubtitlesException(String message, Throwable cause) {
super(message, cause);
}
}
/*//////////////////////////////////////////////////////////////////////////*/
private Document doc;
@ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject playerArgs;
@Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>();
@Nonnull
private List<Subtitles> availableSubtitles = new ArrayList<>();
private boolean isAgeRestricted;
@ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Override
@Nullable
@Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return getSubtitles(SubtitlesFormat.TTML);
return getSubtitles(SubtitlesFormat.VTT);
}
@Override
@Nullable
@Nonnull
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
assertPageFetched();
if(isAgeRestricted) {
// If the video is age restricted getPlayerConfig will fail
return null;
List<Subtitles> subtitles = new ArrayList<>();
for (final Subtitles subtitle : availableSubtitles) {
if (subtitle.getFileType() == format) subtitles.add(subtitle);
}
// TODO: This should be done in onFetchPage()
JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
String playerResponse = playerConfig.getObject("args").getString("player_response");
JsonObject captions;
try {
// Captions does not exist, return null
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
captions = JsonParser.object().from(playerResponse).getObject("captions");
} catch (JsonParserException e) {
// Failed to parse subtitles
return null;
}
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
int captionsSize = captionsArray.size();
// Should not happen, if there is the "captions" object, it should always has some captions in it
if(captionsSize == 0) return null;
List<Subtitles> result = new ArrayList<>();
for (int x = 0; x < captionsSize; x++) {
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
String extension = format.getExtension();
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
}
return result;
return subtitles;
}
@Override
@ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
if (decryptionCode.isEmpty()) {
decryptionCode = loadDecryptionCode(playerUrl);
}
if (availableSubtitles.isEmpty()) {
availableSubtitles.addAll(getAvailableSubtitles(getId()));
}
}
private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
@ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return result == null ? "" : result.toString();
}
private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException {
try {
final String listingUrl = getVideoSubtitlesListingUrl(id);
final String pageContent = NewPipe.getDownloader().download(listingUrl);
final Document listing = Jsoup.parse(pageContent, listingUrl);
final Elements tracks = listing.select("track");
List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 5);
for (final Element track : tracks) {
final String languageCode = track.attr("lang_code");
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML));
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT));
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
}
return subtitles;
} catch (IOException | ReCaptchaException e) {
throw new SubtitlesException("Unable to download subtitles listing", e);
}
}
/*//////////////////////////////////////////////////////////////////////////
// Data Class
//////////////////////////////////////////////////////////////////////////*/
@ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
//////////////////////////////////////////////////////////////////////////*/
@Nonnull
private String getVideoInfoUrl(final String id, final String sts) {
private static String getVideoInfoUrl(final String id, final String sts) {
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
"&eurl=https://youtube.googleapis.com/v/" + id +
"&sts=" + sts + "&ps=default&gl=US&hl=en";
}
@Nonnull
private static String getVideoSubtitlesListingUrl(final String id) {
return "https://video.google.com/timedtext?type=list&v=" + id;
}
@Nonnull
private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) {
final String url = "https://www.youtube.com/api/timedtext?lang=" + locale +
"&fmt=" + format.getExtension() + "&name=&v=" + id;
// These are all non-generated
return new Subtitles(format, locale, url, false);
}
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();

View file

@ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
@Nullable
@Nonnull
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
@Nullable
@Nonnull
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
public abstract StreamType getStreamType() throws ParsingException;

View file

@ -138,6 +138,10 @@ public class StreamInfo extends Info {
return start_position;
}
public List<Subtitles> getSubtitles() {
return subtitles;
}
public void setStreamType(StreamType stream_type) {
this.stream_type = stream_type;
}
@ -214,6 +218,10 @@ public class StreamInfo extends Info {
this.start_position = start_position;
}
public void setSubtitles(List<Subtitles> subtitles) {
this.subtitles = subtitles;
}
public static class StreamExtractException extends ExtractionException {
StreamExtractException(String message) {
super(message);
@ -313,6 +321,12 @@ public class StreamInfo extends Info {
streamInfo.addError(new ExtractionException("Couldn't get video only streams", e));
}
try {
streamInfo.setSubtitles(extractor.getSubtitlesDefault());
} catch (Exception e) {
streamInfo.addError(new ExtractionException("Couldn't get subtitles", e));
}
// Lists can be null if a exception was thrown during extraction
if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList());
if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList());
@ -444,4 +458,6 @@ public class StreamInfo extends Info {
public List<InfoItem> related_streams;
//in seconds. some metadata is not passed using a StreamInfo object!
public long start_position = 0;
public List<Subtitles> subtitles;
}

View file

@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.ExtractorAsserts;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest {
@Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault() == null);
assertTrue(extractor.getSubtitlesDefault().isEmpty());
}
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
}
}

View file

@ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest {
@Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertNull(extractor.getSubtitlesDefault());
assertTrue(extractor.getSubtitlesDefault().isEmpty());
}
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertNull(extractor.getSubtitles(SubtitlesFormat.VTT));
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
}
}