diff --git a/NewPipeExtractor.iml b/NewPipeExtractor.iml index fa027b00..cdb9deeb 100644 --- a/NewPipeExtractor.iml +++ b/NewPipeExtractor.iml @@ -1,30 +1,13 @@ - - - - - - - - - - + + - - - - + + - - - - - \ No newline at end of file diff --git a/src/main/java/org/schabi/newpipe/extractor/Subtitles.java b/src/main/java/org/schabi/newpipe/extractor/Subtitles.java new file mode 100644 index 00000000..0542b385 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/Subtitles.java @@ -0,0 +1,30 @@ +package org.schabi.newpipe.extractor; + +import org.schabi.newpipe.extractor.stream.SubtitlesFormat; + +public class Subtitles { + private SubtitlesFormat format; + private String languageCode, URL; + private boolean autoGenerated; + + public Subtitles(SubtitlesFormat format, String languageCode, String URL, boolean autoGenerated) { + this.format = format; + this.languageCode = languageCode; + this.URL = URL; + this.autoGenerated = autoGenerated; + } + + public SubtitlesFormat getFileType() { return format; } + + public String getLanguageCode() { + return languageCode; + } + + public String getURL() { + return URL; + } + + public boolean isAutoGenerated() { + return autoGenerated; + } +} diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java index 0624c2ef..90404282 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java @@ -3,19 +3,16 @@ package org.schabi.newpipe.extractor.services.soundcloud; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.MediaFormat; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.*; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; public class SoundcloudStreamExtractor extends StreamExtractor { private JsonObject track; @@ -150,6 +147,16 @@ public class SoundcloudStreamExtractor extends StreamExtractor { return null; } + @Override + public List getSubtitlesDefault() throws IOException, ExtractionException { + return null; + } + + @Override + public List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { + return null; + } + @Override public StreamType getStreamType() { return StreamType.AUDIO_STREAM; diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 9036dfcd..40e10226 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -1,7 +1,9 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -11,6 +13,7 @@ import org.mozilla.javascript.ScriptableObject; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.Subtitles; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -20,10 +23,7 @@ import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -379,6 +379,49 @@ public class YoutubeStreamExtractor extends StreamExtractor { return videoOnlyStreams; } + @Override + public List getSubtitlesDefault() throws IOException, ExtractionException { + return getSubtitles(SubtitlesFormat.TTML); + } + + @Override + public List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { + JsonObject playerConfig = getPlayerConfig(getPageHtml()); + String playerResponse = playerConfig.getObject("args").getString("player_response"); + + JsonObject captions; + try { + // Captions does not exist, return null + if (!JsonParser.object().from(playerResponse).has("captions")) return null; + + captions = JsonParser.object().from(playerResponse).getObject("captions"); + } catch (JsonParserException e) { + // Failed to parse subtitles + return null; + } + JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks"); + + int captionsSize = captionsArray.size(); + // Should not happen, if there is the "captions" object, it should always has some captions in it + if(captionsSize == 0) return null; + + List result = new ArrayList<>(); + for (int x = 0; x < captionsSize; x++) { + String baseUrl = captionsArray.getObject(x).getString("baseUrl"); + + String extension = format.getExtension(); + + String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension); + String captionsLangCode = captionsArray.getObject(x).getString("vssId"); + boolean isAutoGenerated = captionsLangCode.startsWith("a."); + String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", ""); + + result.add(new Subtitles(format, languageCode, URL, isAutoGenerated)); + } + + return result; + } + @Override public StreamType getStreamType() throws ParsingException { //todo: if implementing livestream support this value should be generated dynamically @@ -456,13 +499,24 @@ public class YoutubeStreamExtractor extends StreamExtractor { private static volatile String decryptionCode = ""; + private static String pageHtml = null; + + private String getPageHtml() throws IOException, ExtractionException{ + if (pageHtml == null) { + Downloader dl = NewPipe.getDownloader(); + pageHtml = dl.download(getCleanUrl()); + } + return pageHtml; + } + @Override public void fetchPage() throws IOException, ExtractionException { Downloader dl = NewPipe.getDownloader(); - String pageContent = dl.download(getCleanUrl()); + String pageContent = getPageHtml(); doc = Jsoup.parse(pageContent, getCleanUrl()); + String playerUrl; // Check if the video is age restricted if (pageContent.contains(". */ +import com.grack.nanojson.JsonParserException; import org.schabi.newpipe.extractor.Extractor; import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.Subtitles; import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.utils.Parser; import java.io.IOException; @@ -109,6 +112,8 @@ public abstract class StreamExtractor extends Extractor { public abstract List getAudioStreams() throws IOException, ExtractionException; public abstract List getVideoStreams() throws IOException, ExtractionException; public abstract List getVideoOnlyStreams() throws IOException, ExtractionException; + public abstract List getSubtitlesDefault() throws IOException, ExtractionException; + public abstract List getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException; public abstract StreamType getStreamType() throws ParsingException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; diff --git a/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesFormat.java b/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesFormat.java new file mode 100644 index 00000000..a4545480 --- /dev/null +++ b/src/main/java/org/schabi/newpipe/extractor/stream/SubtitlesFormat.java @@ -0,0 +1,27 @@ +package org.schabi.newpipe.extractor.stream; + +import org.schabi.newpipe.extractor.Subtitles; + +public enum SubtitlesFormat { + // YouTube subtitles formats + // TRANSCRIPT(3) is default YT format based on TTML, + // but unlike VTT or TTML, it is NOT W3 standard + // TRANSCRIPT subtitles are NOT supported by ExoPlayer, only VTT and TTML + VTT (0x0, "vtt"), + TTML (0x1, "ttml"), + TRANSCRIPT1 (0x2, "srv1"), + TRANSCRIPT2 (0x3, "srv2"), + TRANSCRIPT3 (0x4, "srv3"); + + private int id; + private String extension; + + SubtitlesFormat(int id, String extension) { + this.id = id; + this.extension = extension; + } + + public String getExtension() { + return extension; + } +} diff --git a/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 2db76246..9920c7fb 100644 --- a/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -54,7 +54,6 @@ public class Parser { if (foundMatch) { return mat.group(group); } else { - //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); if (input.length() > 1024) { throw new RegexException("failed to find pattern \"" + pattern); } else { @@ -64,12 +63,9 @@ public class Parser { } public static boolean isMatch(String pattern, String input) { - try { - matchGroup1(pattern, input); - return true; - } catch (RegexException e) { - return false; - } + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + return mat.find(); } public static Map compatParseMap(final String input) throws UnsupportedEncodingException { diff --git a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java index 2d76bdd3..fe23298b 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java @@ -1,16 +1,20 @@ package org.schabi.newpipe.extractor.services.soundcloud; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.Subtitles; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector; import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.stream.SubtitlesFormat; import java.io.IOException; +import java.util.List; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; @@ -101,4 +105,16 @@ public class SoundcloudStreamExtractorDefaultTest { assertFalse(relatedVideos.getItemList().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty()); } + + @Test + public void testGetSubtitlesListDefault() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitlesDefault() == null); + } + + @Test + public void testGetSubtitlesList() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); + } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index 63dfaed0..6e15b499 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -1,17 +1,16 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.stream.StreamExtractor; -import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector; -import org.schabi.newpipe.extractor.stream.StreamType; -import org.schabi.newpipe.extractor.stream.VideoStream; +import org.schabi.newpipe.extractor.stream.*; import java.io.IOException; +import java.util.HashMap; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.ServiceList.YouTube; @@ -148,4 +147,16 @@ public class YoutubeStreamExtractorDefaultTest { } assertTrue(relatedVideos.getErrors().isEmpty()); } + + @Test + public void testGetSubtitlesListDefault() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitlesDefault() == null); + } + + @Test + public void testGetSubtitlesList() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); + } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java index fadd13df..65a11ebe 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube; +import com.grack.nanojson.JsonParserException; import org.junit.Before; import org.junit.Test; import org.schabi.newpipe.Downloader; @@ -7,6 +8,7 @@ import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.stream.SubtitlesFormat; import org.schabi.newpipe.extractor.stream.VideoStream; import java.io.IOException; @@ -103,4 +105,17 @@ public class YoutubeStreamExtractorRestrictedTest { 0 <= s.format && s.format <= 4); } } + + + @Test + public void testGetSubtitlesListDefault() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitlesDefault() == null); + } + + @Test + public void testGetSubtitlesList() throws IOException, ExtractionException { + // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); + } }