-Fixed subtitles extraction to use method from youtube-dl.
-Expose subtitles during extraction. -Make subtitle lists return nonnull empty collections instead of null.
This commit is contained in:
parent
0061131d39
commit
1f1bbaad57
7 changed files with 82 additions and 57 deletions
|
@ -2,7 +2,9 @@ package org.schabi.newpipe.extractor;
|
|||
|
||||
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
|
||||
|
||||
public class Subtitles {
|
||||
import java.io.Serializable;
|
||||
|
||||
public class Subtitles implements Serializable {
|
||||
private final SubtitlesFormat format;
|
||||
private final String languageCode;
|
||||
private final String URL;
|
||||
|
|
|
@ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
@Override
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
||||
return null;
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
||||
return null;
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
|
|||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.mozilla.javascript.Context;
|
||||
import org.mozilla.javascript.Function;
|
||||
import org.mozilla.javascript.ScriptableObject;
|
||||
|
@ -26,8 +27,6 @@ import javax.annotation.Nonnull;
|
|||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 06.08.15.
|
||||
|
@ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
public class SubtitlesException extends ContentNotAvailableException {
|
||||
SubtitlesException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
private Document doc;
|
||||
|
@ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
private JsonObject playerArgs;
|
||||
@Nonnull
|
||||
private final Map<String, String> videoInfoPage = new HashMap<>();
|
||||
@Nonnull
|
||||
private List<Subtitles> availableSubtitles = new ArrayList<>();
|
||||
|
||||
private boolean isAgeRestricted;
|
||||
|
||||
|
@ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
@Override
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
|
||||
return getSubtitles(SubtitlesFormat.TTML);
|
||||
return getSubtitles(SubtitlesFormat.VTT);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
|
||||
assertPageFetched();
|
||||
if(isAgeRestricted) {
|
||||
// If the video is age restricted getPlayerConfig will fail
|
||||
return null;
|
||||
List<Subtitles> subtitles = new ArrayList<>();
|
||||
for (final Subtitles subtitle : availableSubtitles) {
|
||||
if (subtitle.getFileType() == format) subtitles.add(subtitle);
|
||||
}
|
||||
// TODO: This should be done in onFetchPage()
|
||||
JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
|
||||
String playerResponse = playerConfig.getObject("args").getString("player_response");
|
||||
|
||||
JsonObject captions;
|
||||
try {
|
||||
// Captions does not exist, return null
|
||||
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
|
||||
|
||||
captions = JsonParser.object().from(playerResponse).getObject("captions");
|
||||
} catch (JsonParserException e) {
|
||||
// Failed to parse subtitles
|
||||
return null;
|
||||
}
|
||||
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
|
||||
|
||||
int captionsSize = captionsArray.size();
|
||||
// Should not happen, if there is the "captions" object, it should always has some captions in it
|
||||
if(captionsSize == 0) return null;
|
||||
|
||||
List<Subtitles> result = new ArrayList<>();
|
||||
for (int x = 0; x < captionsSize; x++) {
|
||||
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
|
||||
|
||||
String extension = format.getExtension();
|
||||
|
||||
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
|
||||
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
|
||||
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
|
||||
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
|
||||
|
||||
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
|
||||
}
|
||||
|
||||
return result;
|
||||
return subtitles;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
if (decryptionCode.isEmpty()) {
|
||||
decryptionCode = loadDecryptionCode(playerUrl);
|
||||
}
|
||||
|
||||
if (availableSubtitles.isEmpty()) {
|
||||
availableSubtitles.addAll(getAvailableSubtitles(getId()));
|
||||
}
|
||||
}
|
||||
|
||||
private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
|
||||
|
@ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return result == null ? "" : result.toString();
|
||||
}
|
||||
|
||||
private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException {
|
||||
try {
|
||||
final String listingUrl = getVideoSubtitlesListingUrl(id);
|
||||
final String pageContent = NewPipe.getDownloader().download(listingUrl);
|
||||
final Document listing = Jsoup.parse(pageContent, listingUrl);
|
||||
final Elements tracks = listing.select("track");
|
||||
|
||||
List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 5);
|
||||
for (final Element track : tracks) {
|
||||
final String languageCode = track.attr("lang_code");
|
||||
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML));
|
||||
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT));
|
||||
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
|
||||
}
|
||||
return subtitles;
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new SubtitlesException("Unable to download subtitles listing", e);
|
||||
}
|
||||
}
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// Data Class
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
@ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
@Nonnull
|
||||
private String getVideoInfoUrl(final String id, final String sts) {
|
||||
private static String getVideoInfoUrl(final String id, final String sts) {
|
||||
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
||||
"&eurl=https://youtube.googleapis.com/v/" + id +
|
||||
"&sts=" + sts + "&ps=default&gl=US&hl=en";
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String getVideoSubtitlesListingUrl(final String id) {
|
||||
return "https://video.google.com/timedtext?type=list&v=" + id;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) {
|
||||
final String url = "https://www.youtube.com/api/timedtext?lang=" + locale +
|
||||
"&fmt=" + format.getExtension() + "&name=&v=" + id;
|
||||
// These are all non-generated
|
||||
return new Subtitles(format, locale, url, false);
|
||||
}
|
||||
|
||||
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
|
||||
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();
|
||||
|
||||
|
|
|
@ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor {
|
|||
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
|
||||
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
|
||||
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
|
||||
|
||||
@Nullable
|
||||
@Nonnull
|
||||
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
|
||||
|
||||
public abstract StreamType getStreamType() throws ParsingException;
|
||||
|
|
|
@ -138,6 +138,10 @@ public class StreamInfo extends Info {
|
|||
return start_position;
|
||||
}
|
||||
|
||||
public List<Subtitles> getSubtitles() {
|
||||
return subtitles;
|
||||
}
|
||||
|
||||
public void setStreamType(StreamType stream_type) {
|
||||
this.stream_type = stream_type;
|
||||
}
|
||||
|
@ -214,6 +218,10 @@ public class StreamInfo extends Info {
|
|||
this.start_position = start_position;
|
||||
}
|
||||
|
||||
public void setSubtitles(List<Subtitles> subtitles) {
|
||||
this.subtitles = subtitles;
|
||||
}
|
||||
|
||||
public static class StreamExtractException extends ExtractionException {
|
||||
StreamExtractException(String message) {
|
||||
super(message);
|
||||
|
@ -313,6 +321,12 @@ public class StreamInfo extends Info {
|
|||
streamInfo.addError(new ExtractionException("Couldn't get video only streams", e));
|
||||
}
|
||||
|
||||
try {
|
||||
streamInfo.setSubtitles(extractor.getSubtitlesDefault());
|
||||
} catch (Exception e) {
|
||||
streamInfo.addError(new ExtractionException("Couldn't get subtitles", e));
|
||||
}
|
||||
|
||||
// Lists can be null if a exception was thrown during extraction
|
||||
if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList());
|
||||
if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList());
|
||||
|
@ -444,4 +458,6 @@ public class StreamInfo extends Info {
|
|||
public List<InfoItem> related_streams;
|
||||
//in seconds. some metadata is not passed using a StreamInfo object!
|
||||
public long start_position = 0;
|
||||
|
||||
public List<Subtitles> subtitles;
|
||||
}
|
||||
|
|
|
@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.Downloader;
|
||||
import org.schabi.newpipe.extractor.ExtractorAsserts;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
@ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest {
|
|||
@Test
|
||||
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||
assertTrue(extractor.getSubtitlesDefault() == null);
|
||||
assertTrue(extractor.getSubtitlesDefault().isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
|
||||
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest {
|
|||
@Test
|
||||
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
|
||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||
assertNull(extractor.getSubtitlesDefault());
|
||||
assertTrue(extractor.getSubtitlesDefault().isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetSubtitlesList() throws IOException, ExtractionException {
|
||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||
assertNull(extractor.getSubtitles(SubtitlesFormat.VTT));
|
||||
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue