Added method to download subtitles as VTT

This commit is contained in:
tonakriz 2017-11-23 00:10:12 +01:00
parent 6031695343
commit 6e3651fdf5
7 changed files with 43 additions and 41 deletions

View file

@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
@ -194,10 +195,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
} }
@Override @Override
public HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException { public HashMap<String, String[]> getSubtitlesList() throws IOException, ExtractionException, JsonParserException {
return new HashMap<>(); return new HashMap<>();
} }
@Override
public String downloadSubtitles(String URL) throws IOException, ReCaptchaException {
return "";
}
@Override @Override
public StreamType getStreamType() { public StreamType getStreamType() {
return StreamType.AUDIO_STREAM; return StreamType.AUDIO_STREAM;

View file

@ -420,34 +420,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return videoOnlyStreams; return videoOnlyStreams;
} }
/**
* Example output:
* {
* #language code#: [
* [0]"captions URL",
* [1]"language Name"
* ],
* "a.en": { // a.#language code# == auto generated
* [0]"https://youtube.com/api/timedtext..."
* [1]"English (Auto-generated)"
* },
* ".en": { // .#language code# == normal (not auto generated)
* [0]"https://youtube.com/api/timedtext..."
* [1]"English"
* }
* }
*
* Example usage:
* 1) Get list of keys in the Map if there are any
* 2) Get
*
* @return Map(String, StringArray[2])
* @throws IOException - Thrown when parsing HTML page
* @throws ExtractionException - Thrown when parsing HTML
* @throws JsonParserException - Thrown when parsing JSON from the web page
*/
@Override @Override
public HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException { public HashMap<String, String[]> getSubtitlesList() throws IOException, ExtractionException, JsonParserException {
HashMap<String, String[]> result = new HashMap<>(); HashMap<String, String[]> result = new HashMap<>();
JsonObject playerConfig = getPlayerConfig(getPageHtml()); JsonObject playerConfig = getPlayerConfig(getPageHtml());
@ -473,6 +447,14 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return result; return result;
} }
@Override
public String downloadSubtitles(String URL) throws IOException, ReCaptchaException {
Downloader dl = NewPipe.getDownloader();
// Instead of the WebVTT 'vtt' we can use also Timed Text Markup Language 'ttml'
String URLasVTT = URL.replaceAll("&fmt=[^&]*", "&fmt=vtt");
return dl.download(URLasVTT);
}
@Override @Override
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() throws ParsingException {
//todo: if implementing livestream support this value should be generated dynamically //todo: if implementing livestream support this value should be generated dynamically

View file

@ -26,6 +26,7 @@ import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
@ -66,7 +67,8 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException; public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
public abstract HashMap<String, String[]> getSubtitles() throws IOException, ExtractionException, JsonParserException; public abstract HashMap<String, String[]> getSubtitlesList() throws IOException, ExtractionException, JsonParserException;
public abstract String downloadSubtitles(String URL) throws IOException, ReCaptchaException;
public abstract StreamType getStreamType() throws ParsingException; public abstract StreamType getStreamType() throws ParsingException;
public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException;

View file

@ -54,7 +54,6 @@ public class Parser {
if (foundMatch) { if (foundMatch) {
return mat.group(group); return mat.group(group);
} else { } else {
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
if (input.length() > 1024) { if (input.length() > 1024) {
throw new RegexException("failed to find pattern \"" + pattern); throw new RegexException("failed to find pattern \"" + pattern);
} else { } else {
@ -64,12 +63,9 @@ public class Parser {
} }
public static boolean isMatch(String pattern, String input) { public static boolean isMatch(String pattern, String input) {
try { Pattern pat = Pattern.compile(pattern);
matchGroup1(pattern, input); Matcher mat = pat.matcher(input);
return true; return mat.find();
} catch (RegexException e) {
return false;
}
} }
public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException { public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException {

View file

@ -105,6 +105,6 @@ public class SoundcloudStreamExtractorDefaultTest {
@Test @Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null); assertTrue(extractor.getSubtitlesList() != null);
} }
} }

View file

@ -152,7 +152,18 @@ public class YoutubeStreamExtractorDefaultTest {
} }
@Test @Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { public void testGetSubtitlesList() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null); assertTrue(extractor.getSubtitlesList() != null);
}
@Test
public void testDownloadSubtitles() throws Exception {
try {
extractor.downloadSubtitles(extractor.getSubtitlesList().get("en")[0]);
// Video has no subtitles!
assert false;
} catch (Exception e) {
assert true;
}
} }
} }

View file

@ -106,7 +106,12 @@ public class YoutubeStreamExtractorRestrictedTest {
} }
@Test @Test
public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { public void testGetSubtitlesList() throws IOException, ExtractionException, JsonParserException {
assertTrue(extractor.getSubtitles() != null); assertTrue(extractor.getSubtitlesList() != null);
}
@Test
public void testDownloadSubtitles() throws Exception {
assertTrue(extractor.downloadSubtitles("https://youtu.be/FmG385_uUys?t=174") != null);
} }
} }