-Fixed subtitles extraction to use method from youtube-dl.
-Expose subtitles during extraction. -Make subtitle lists return nonnull empty collections instead of null.
This commit is contained in:
		
							parent
							
								
									0061131d39
								
							
						
					
					
						commit
						1f1bbaad57
					
				
					 7 changed files with 82 additions and 57 deletions
				
			
		|  | @ -2,7 +2,9 @@ package org.schabi.newpipe.extractor; | ||||||
| 
 | 
 | ||||||
| import org.schabi.newpipe.extractor.stream.SubtitlesFormat; | import org.schabi.newpipe.extractor.stream.SubtitlesFormat; | ||||||
| 
 | 
 | ||||||
| public class Subtitles { | import java.io.Serializable; | ||||||
|  | 
 | ||||||
|  | public class Subtitles implements Serializable { | ||||||
|     private final SubtitlesFormat format; |     private final SubtitlesFormat format; | ||||||
|     private final String languageCode; |     private final String languageCode; | ||||||
|     private final String URL; |     private final String URL; | ||||||
|  |  | ||||||
|  | @ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     @Nullable |     @Nonnull | ||||||
|     public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { |     public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { | ||||||
|         return null; |         return Collections.emptyList(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     @Nullable |     @Nonnull | ||||||
|     public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { |     public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { | ||||||
|         return null; |         return Collections.emptyList(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException; | ||||||
| import org.jsoup.Jsoup; | import org.jsoup.Jsoup; | ||||||
| import org.jsoup.nodes.Document; | import org.jsoup.nodes.Document; | ||||||
| import org.jsoup.nodes.Element; | import org.jsoup.nodes.Element; | ||||||
|  | import org.jsoup.select.Elements; | ||||||
| import org.mozilla.javascript.Context; | import org.mozilla.javascript.Context; | ||||||
| import org.mozilla.javascript.Function; | import org.mozilla.javascript.Function; | ||||||
| import org.mozilla.javascript.ScriptableObject; | import org.mozilla.javascript.ScriptableObject; | ||||||
|  | @ -26,8 +27,6 @@ import javax.annotation.Nonnull; | ||||||
| import javax.annotation.Nullable; | import javax.annotation.Nullable; | ||||||
| import java.io.IOException; | import java.io.IOException; | ||||||
| import java.util.*; | import java.util.*; | ||||||
| import java.util.regex.Matcher; |  | ||||||
| import java.util.regex.Pattern; |  | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
|  * Created by Christian Schabesberger on 06.08.15. |  * Created by Christian Schabesberger on 06.08.15. | ||||||
|  | @ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     public class SubtitlesException extends ContentNotAvailableException { | ||||||
|  |         SubtitlesException(String message, Throwable cause) { | ||||||
|  |             super(message, cause); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /*//////////////////////////////////////////////////////////////////////////*/ |     /*//////////////////////////////////////////////////////////////////////////*/ | ||||||
| 
 | 
 | ||||||
|     private Document doc; |     private Document doc; | ||||||
|  | @ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|     private JsonObject playerArgs; |     private JsonObject playerArgs; | ||||||
|     @Nonnull |     @Nonnull | ||||||
|     private final Map<String, String> videoInfoPage = new HashMap<>(); |     private final Map<String, String> videoInfoPage = new HashMap<>(); | ||||||
|  |     @Nonnull | ||||||
|  |     private List<Subtitles> availableSubtitles = new ArrayList<>(); | ||||||
| 
 | 
 | ||||||
|     private boolean isAgeRestricted; |     private boolean isAgeRestricted; | ||||||
| 
 | 
 | ||||||
|  | @ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     @Nullable |     @Nonnull | ||||||
|     public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { |     public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { | ||||||
|         return getSubtitles(SubtitlesFormat.TTML); |         return getSubtitles(SubtitlesFormat.VTT); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     @Nullable |     @Nonnull | ||||||
|     public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { |     public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { | ||||||
|         assertPageFetched(); |         assertPageFetched(); | ||||||
|         if(isAgeRestricted) { |         List<Subtitles> subtitles = new ArrayList<>(); | ||||||
|             // If the video is age restricted getPlayerConfig will fail |         for (final Subtitles subtitle : availableSubtitles) { | ||||||
|             return null; |             if (subtitle.getFileType() == format) subtitles.add(subtitle); | ||||||
|         } |         } | ||||||
|         // TODO: This should be done in onFetchPage() |         return subtitles; | ||||||
|         JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader())); |  | ||||||
|         String playerResponse = playerConfig.getObject("args").getString("player_response"); |  | ||||||
| 
 |  | ||||||
|         JsonObject captions; |  | ||||||
|         try { |  | ||||||
|             // Captions does not exist, return null |  | ||||||
|             if (!JsonParser.object().from(playerResponse).has("captions")) return null; |  | ||||||
| 
 |  | ||||||
|             captions = JsonParser.object().from(playerResponse).getObject("captions"); |  | ||||||
|         } catch (JsonParserException e) { |  | ||||||
|             // Failed to parse subtitles |  | ||||||
|             return null; |  | ||||||
|         } |  | ||||||
|         JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks"); |  | ||||||
| 
 |  | ||||||
|         int captionsSize = captionsArray.size(); |  | ||||||
|         // Should not happen, if there is the "captions" object, it should always has some captions in it |  | ||||||
|         if(captionsSize == 0) return null; |  | ||||||
| 
 |  | ||||||
|         List<Subtitles> result = new ArrayList<>(); |  | ||||||
|         for (int x = 0; x < captionsSize; x++) { |  | ||||||
|             String baseUrl = captionsArray.getObject(x).getString("baseUrl"); |  | ||||||
| 
 |  | ||||||
|             String extension = format.getExtension(); |  | ||||||
| 
 |  | ||||||
|             String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension); |  | ||||||
|             String captionsLangCode = captionsArray.getObject(x).getString("vssId"); |  | ||||||
|             boolean isAutoGenerated = captionsLangCode.startsWith("a."); |  | ||||||
|             String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", ""); |  | ||||||
| 
 |  | ||||||
|             result.add(new Subtitles(format, languageCode, URL, isAutoGenerated)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         return result; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|  | @ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|         if (decryptionCode.isEmpty()) { |         if (decryptionCode.isEmpty()) { | ||||||
|             decryptionCode = loadDecryptionCode(playerUrl); |             decryptionCode = loadDecryptionCode(playerUrl); | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         if (availableSubtitles.isEmpty()) { | ||||||
|  |             availableSubtitles.addAll(getAvailableSubtitles(getId())); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     private JsonObject getPlayerConfig(String pageContent) throws ParsingException { |     private JsonObject getPlayerConfig(String pageContent) throws ParsingException { | ||||||
|  | @ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|         return result == null ? "" : result.toString(); |         return result == null ? "" : result.toString(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException { | ||||||
|  |         try { | ||||||
|  |             final String listingUrl = getVideoSubtitlesListingUrl(id); | ||||||
|  |             final String pageContent = NewPipe.getDownloader().download(listingUrl); | ||||||
|  |             final Document listing = Jsoup.parse(pageContent, listingUrl); | ||||||
|  |             final Elements tracks = listing.select("track"); | ||||||
|  | 
 | ||||||
|  |             List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 5); | ||||||
|  |             for (final Element track : tracks) { | ||||||
|  |                 final String languageCode = track.attr("lang_code"); | ||||||
|  |                 subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML)); | ||||||
|  |                 subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT)); | ||||||
|  |                 // todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer | ||||||
|  |             } | ||||||
|  |             return subtitles; | ||||||
|  |         } catch (IOException | ReCaptchaException e) { | ||||||
|  |             throw new SubtitlesException("Unable to download subtitles listing", e); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     /*////////////////////////////////////////////////////////////////////////// |     /*////////////////////////////////////////////////////////////////////////// | ||||||
|     // Data Class |     // Data Class | ||||||
|     //////////////////////////////////////////////////////////////////////////*/ |     //////////////////////////////////////////////////////////////////////////*/ | ||||||
|  | @ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|     //////////////////////////////////////////////////////////////////////////*/ |     //////////////////////////////////////////////////////////////////////////*/ | ||||||
| 
 | 
 | ||||||
|     @Nonnull |     @Nonnull | ||||||
|     private String getVideoInfoUrl(final String id, final String sts) { |     private static String getVideoInfoUrl(final String id, final String sts) { | ||||||
|         return "https://www.youtube.com/get_video_info?" + "video_id=" + id + |         return "https://www.youtube.com/get_video_info?" + "video_id=" + id + | ||||||
|                 "&eurl=https://youtube.googleapis.com/v/" + id + |                 "&eurl=https://youtube.googleapis.com/v/" + id + | ||||||
|                 "&sts=" + sts + "&ps=default&gl=US&hl=en"; |                 "&sts=" + sts + "&ps=default&gl=US&hl=en"; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     @Nonnull | ||||||
|  |     private static String getVideoSubtitlesListingUrl(final String id) { | ||||||
|  |         return "https://video.google.com/timedtext?type=list&v=" + id; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     @Nonnull | ||||||
|  |     private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) { | ||||||
|  |         final String url = "https://www.youtube.com/api/timedtext?lang=" + locale + | ||||||
|  |                 "&fmt=" + format.getExtension() + "&name=&v=" + id; | ||||||
|  |         // These are all non-generated | ||||||
|  |         return new Subtitles(format, locale, url, false); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { |     private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { | ||||||
|         Map<String, ItagItem> urlAndItags = new LinkedHashMap<>(); |         Map<String, ItagItem> urlAndItags = new LinkedHashMap<>(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor { | ||||||
|     public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; |     public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; | ||||||
|     public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; |     public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; | ||||||
| 
 | 
 | ||||||
|     @Nullable |     @Nonnull | ||||||
|     public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException; |     public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException; | ||||||
| 
 |     @Nonnull | ||||||
|     @Nullable |  | ||||||
|     public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException; |     public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException; | ||||||
| 
 | 
 | ||||||
|     public abstract StreamType getStreamType() throws ParsingException; |     public abstract StreamType getStreamType() throws ParsingException; | ||||||
|  |  | ||||||
|  | @ -138,6 +138,10 @@ public class StreamInfo extends Info { | ||||||
|         return start_position; |         return start_position; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     public List<Subtitles> getSubtitles() { | ||||||
|  |         return subtitles; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     public void setStreamType(StreamType stream_type) { |     public void setStreamType(StreamType stream_type) { | ||||||
|         this.stream_type = stream_type; |         this.stream_type = stream_type; | ||||||
|     } |     } | ||||||
|  | @ -214,6 +218,10 @@ public class StreamInfo extends Info { | ||||||
|         this.start_position = start_position; |         this.start_position = start_position; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     public void setSubtitles(List<Subtitles> subtitles) { | ||||||
|  |         this.subtitles = subtitles; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     public static class StreamExtractException extends ExtractionException { |     public static class StreamExtractException extends ExtractionException { | ||||||
|         StreamExtractException(String message) { |         StreamExtractException(String message) { | ||||||
|             super(message); |             super(message); | ||||||
|  | @ -313,6 +321,12 @@ public class StreamInfo extends Info { | ||||||
|             streamInfo.addError(new ExtractionException("Couldn't get video only streams", e)); |             streamInfo.addError(new ExtractionException("Couldn't get video only streams", e)); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         try { | ||||||
|  |             streamInfo.setSubtitles(extractor.getSubtitlesDefault()); | ||||||
|  |         } catch (Exception e) { | ||||||
|  |             streamInfo.addError(new ExtractionException("Couldn't get subtitles", e)); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         // Lists can be null if a exception was thrown during extraction |         // Lists can be null if a exception was thrown during extraction | ||||||
|         if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList()); |         if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList()); | ||||||
|         if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList()); |         if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList()); | ||||||
|  | @ -444,4 +458,6 @@ public class StreamInfo extends Info { | ||||||
|     public List<InfoItem> related_streams; |     public List<InfoItem> related_streams; | ||||||
|     //in seconds. some metadata is not passed using a StreamInfo object! |     //in seconds. some metadata is not passed using a StreamInfo object! | ||||||
|     public long start_position = 0; |     public long start_position = 0; | ||||||
|  | 
 | ||||||
|  |     public List<Subtitles> subtitles; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube; | ||||||
| import org.junit.BeforeClass; | import org.junit.BeforeClass; | ||||||
| import org.junit.Test; | import org.junit.Test; | ||||||
| import org.schabi.newpipe.Downloader; | import org.schabi.newpipe.Downloader; | ||||||
| import org.schabi.newpipe.extractor.ExtractorAsserts; |  | ||||||
| import org.schabi.newpipe.extractor.NewPipe; | import org.schabi.newpipe.extractor.NewPipe; | ||||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | @ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest { | ||||||
|     @Test |     @Test | ||||||
|     public void testGetSubtitlesListDefault() throws IOException, ExtractionException { |     public void testGetSubtitlesListDefault() throws IOException, ExtractionException { | ||||||
|         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null |         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null | ||||||
|         assertTrue(extractor.getSubtitlesDefault() == null); |         assertTrue(extractor.getSubtitlesDefault().isEmpty()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Test |     @Test | ||||||
|     public void testGetSubtitlesList() throws IOException, ExtractionException { |     public void testGetSubtitlesList() throws IOException, ExtractionException { | ||||||
|         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null |         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null | ||||||
|         assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); |         assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest { | ||||||
|     @Test |     @Test | ||||||
|     public void testGetSubtitlesListDefault() throws IOException, ExtractionException { |     public void testGetSubtitlesListDefault() throws IOException, ExtractionException { | ||||||
|         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null |         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null | ||||||
|         assertNull(extractor.getSubtitlesDefault()); |         assertTrue(extractor.getSubtitlesDefault().isEmpty()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Test |     @Test | ||||||
|     public void testGetSubtitlesList() throws IOException, ExtractionException { |     public void testGetSubtitlesList() throws IOException, ExtractionException { | ||||||
|         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null |         // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null | ||||||
|         assertNull(extractor.getSubtitles(SubtitlesFormat.VTT)); |         assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty()); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue