ADD basic playlist support + youtube playlist support

2017-03-12 16:15:51 +01:00 · 2017-03-12 16:15:51 +01:00 · e8794d92b5
commit e8794d92b5
parent f63b7e8f00
9 changed files with 433 additions and 0 deletions
--- a/StreamingService.java
+++ b/StreamingService.java
@ -52,8 +52,11 @@ public abstract class StreamingService {
    public abstract SearchEngine getSearchEngineInstance();
    public abstract UrlIdHandler getStreamUrlIdHandlerInstance();
    public abstract UrlIdHandler getChannelUrlIdHandlerInstance();
+	public abstract UrlIdHandler getPlayListUrlIdHandlerInstance();
    public abstract ChannelExtractor getChannelExtractorInstance(String url, int page)
            throws ExtractionException, IOException;
+	public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page)
+            throws ExtractionException, IOException;
    public abstract SuggestionExtractor getSuggestionExtractorInstance();

    public final int getServiceId() {
@ -66,11 +69,14 @@ public abstract class StreamingService {
    public final LinkType getLinkTypeByUrl(String url) {
        UrlIdHandler sH = getStreamUrlIdHandlerInstance();
        UrlIdHandler cH = getChannelUrlIdHandlerInstance();
+        UrlIdHandler pH = getPlayListUrlIdHandlerInstance();

        if(sH.acceptUrl(url)) {
            return LinkType.STREAM;
        } else if(cH.acceptUrl(url)) {
            return LinkType.CHANNEL;
+        } else if (pH.acceptUrl(url)) {
+            return LinkType.PLAYLIST;
        } else {
            return LinkType.NONE;
        }
--- a/playlist/PlayListExtractor.java
+++ b/playlist/PlayListExtractor.java
@ -0,0 +1,41 @@
+package org.schabi.newpipe.extractor.playlist;
+
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
+
+import java.io.IOException;
+
+public abstract class PlayListExtractor {
+
+    private int serviceId;
+    private String url;
+    private UrlIdHandler urlIdHandler;
+    private StreamInfoItemCollector previewInfoCollector;
+    private int page = -1;
+
+    public PlayListExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
+            throws ExtractionException, IOException {
+        this.url = url;
+        this.page = page;
+        this.serviceId = serviceId;
+        this.urlIdHandler = urlIdHandler;
+        previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
+    }
+
+    public String getUrl() { return url; }
+    public UrlIdHandler getUrlIdHandler() { return urlIdHandler; }
+    public StreamInfoItemCollector getStreamPreviewInfoCollector() {
+        return previewInfoCollector;
+    }
+
+    public abstract String getName() throws ParsingException;
+    public abstract String getAvatarUrl() throws ParsingException;
+    public abstract String getBannerUrl() throws ParsingException;
+    public abstract StreamInfoItemCollector getStreams() throws ParsingException;
+    public abstract boolean hasNextPage() throws ParsingException;
+    public int getServiceId() {
+        return serviceId;
+    }
+}
--- a/playlist/PlayListInfo.java
+++ b/playlist/PlayListInfo.java
@ -0,0 +1,51 @@
+package org.schabi.newpipe.extractor.playlist;
+
+import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
+
+import java.util.List;
+import java.util.Vector;
+
+public class PlayListInfo {
+
+    public void addException(Exception e) {
+        errors.add(e);
+    }
+
+    public static PlayListInfo getInfo(PlayListExtractor extractor) throws ParsingException {
+        PlayListInfo info = new PlayListInfo();
+
+        info.playList_name = extractor.getName();
+        info.hasNextPage = extractor.hasNextPage();
+
+        try {
+            info.avatar_url = extractor.getAvatarUrl();
+        } catch (Exception e) {
+            info.errors.add(e);
+        }
+        try {
+            info.banner_url = extractor.getBannerUrl();
+        } catch (Exception e) {
+            info.errors.add(e);
+        }
+        try {
+            StreamInfoItemCollector c = extractor.getStreams();
+            info.related_streams = c.getItemList();
+            info.errors.addAll(c.getErrors());
+        } catch(Exception e) {
+            info.errors.add(e);
+        }
+
+        return info;
+    }
+
+    public int service_id = -1;
+    public String playList_name = "";
+    public String avatar_url = "";
+    public String banner_url = "";
+    public List<InfoItem> related_streams = null;
+    public boolean hasNextPage = false;
+
+    public List<Throwable> errors = new Vector<>();
+}
--- a/playlist/PlayListInfoItem.java
+++ b/playlist/PlayListInfoItem.java
@ -0,0 +1,21 @@
+package org.schabi.newpipe.extractor.playlist;
+
+import org.schabi.newpipe.extractor.InfoItem;
+
+public class PlayListInfoItem implements InfoItem {
+
+    public int serviceId = -1;
+    public String name = "";
+    public String thumbnailUrl = "";
+    public String webPageUrl = "";
+
+    public InfoType infoType() {
+        return InfoType.PLAYLIST;
+    }
+    public String getTitle() {
+        return name;
+    }
+    public String getLink() {
+        return webPageUrl;
+    }
+}
--- a/playlist/PlayListInfoItemCollector.java
+++ b/playlist/PlayListInfoItemCollector.java
@ -0,0 +1,33 @@
+package org.schabi.newpipe.extractor.playlist;
+
+import org.schabi.newpipe.extractor.InfoItemCollector;
+import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+public class PlayListInfoItemCollector extends InfoItemCollector {
+    public PlayListInfoItemCollector(int serviceId) {
+        super(serviceId);
+    }
+
+    public PlayListInfoItem extract(PlayListInfoItemExtractor extractor) throws ParsingException {
+        final PlayListInfoItem resultItem = new PlayListInfoItem();
+
+        resultItem.name = extractor.getPlayListName();
+        resultItem.serviceId = getServiceId();
+        resultItem.webPageUrl = extractor.getWebPageUrl();
+        try {
+            resultItem.thumbnailUrl = extractor.getThumbnailUrl();
+        } catch (Exception e) {
+            addError(e);
+        }
+        return resultItem;
+    }
+
+    public void commit(PlayListInfoItemExtractor extractor) throws ParsingException {
+        try {
+            addItem(extract(extractor));
+        } catch (Exception e) {
+            addError(e);
+        }
+    }
+}
--- a/playlist/PlayListInfoItemExtractor.java
+++ b/playlist/PlayListInfoItemExtractor.java
@ -0,0 +1,9 @@
+package org.schabi.newpipe.extractor.playlist;
+
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+public interface PlayListInfoItemExtractor {
+    String getThumbnailUrl() throws ParsingException;
+    String getPlayListName() throws ParsingException;
+    String getWebPageUrl() throws ParsingException;
+}
--- a/services/youtube/YoutubePlayListExtractor.java
+++ b/services/youtube/YoutubePlayListExtractor.java
@ -0,0 +1,223 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.schabi.newpipe.extractor.AbstractStreamInfo;
+import org.schabi.newpipe.extractor.Downloader;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.Parser;
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
+import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
+import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
+
+import java.io.IOException;
+
+public class YoutubePlayListExtractor extends PlayListExtractor {
+
+    private String TAG = YoutubePlayListExtractor.class.toString();
+
+    private Document doc = null;
+
+    private boolean isAjaxPage = false;
+    private static String name = "";
+    private static String feedUrl = "";
+    private static String avatarUrl = "";
+    private static String bannerUrl = "";
+    private static String nextPageUrl = "";
+
+    public YoutubePlayListExtractor(UrlIdHandler urlIdHandler,
+                                    String url, int page, int serviceId) throws IOException, ExtractionException {
+        super(urlIdHandler, url, page, serviceId);
+        Downloader downloader = NewPipe.getDownloader();
+        url = urlIdHandler.cleanUrl(url);
+        if(page == 0) {
+            String channelPageContent = downloader.download(url);
+            doc = Jsoup.parse(channelPageContent, url);
+            nextPageUrl = getNextPageUrl(doc);
+            isAjaxPage = false;
+        } else {
+            String ajaxDataRaw = downloader.download(nextPageUrl);
+            JSONObject ajaxData;
+            try {
+                ajaxData = new JSONObject(ajaxDataRaw);
+                final String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
+                doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
+                final String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
+                if(!nextPageHtmlDataRaw.isEmpty()) {
+                    final Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
+                    nextPageUrl = getNextPageUrl(nextPageData);
+                } else {
+                    nextPageUrl = "";
+                }
+            } catch (JSONException e) {
+                throw new ParsingException("Could not parse json data for next page", e);
+            }
+            isAjaxPage = true;
+        }
+    }
+
+    @Override
+    public String getName() throws ParsingException {
+        try {
+            if (!isAjaxPage) {
+                name = doc.select("span[class=\"qualified-channel-title-text\"]").first()
+                        .select("a").first().text() + " - " +
+                        doc.select("meta[name=title]").first().attr("content");
+            }
+            return name;
+        } catch (Exception e) {
+            throw new ParsingException("Could not get playlist name");
+        }
+    }
+
+    @Override
+    public String getAvatarUrl() throws ParsingException {
+        try {
+            if(!isAjaxPage) {
+                avatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("src");
+                if(avatarUrl.startsWith("//")) {
+                    avatarUrl = "https:" + avatarUrl;
+                }
+            }
+            return avatarUrl;
+        } catch(Exception e) {
+            throw new ParsingException("Could not get playlist Avatar");
+        }
+    }
+
+    @Override
+    public String getBannerUrl() throws ParsingException {
+        try {
+            if(!isAjaxPage) {
+                Element el = doc.select("div[id=\"gh-banner\"] style").first();
+                String cssContent = el.html();
+                String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent);
+                if (url.contains("s.ytimg.com")) {
+                    bannerUrl = null;
+                } else {
+                    bannerUrl = url.substring(0, url.indexOf(");"));
+                }
+            }
+            return bannerUrl;
+        } catch(Exception e) {
+            throw new ParsingException("Could not get playlist Banner");
+        }
+    }
+
+    @Override
+    public StreamInfoItemCollector getStreams() throws ParsingException {
+        StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
+        Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
+        final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
+        for(final Element li : tbody.children()) {
+            collector.commit(new StreamInfoItemExtractor() {
+                @Override
+                public AbstractStreamInfo.StreamType getStreamType() throws ParsingException {
+                    return AbstractStreamInfo.StreamType.VIDEO_STREAM;
+                }
+
+                @Override
+                public String getWebPageUrl() throws ParsingException {
+                    try {
+                        return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id"));
+                    } catch (Exception e) {
+                        throw new ParsingException("Could not get web page url for the video", e);
+                    }
+                }
+
+                @Override
+                public String getTitle() throws ParsingException {
+                    try {
+                        return li.attr("data-title");
+                    } catch (Exception e) {
+                        throw new ParsingException("Could not get title", e);
+                    }
+                }
+
+                @Override
+                public int getDuration() throws ParsingException {
+                    try {
+                        return YoutubeParsingHelper.parseDurationString(
+                                li.select("div[class=\"timestamp\"] span").first().text().trim());
+                    } catch(Exception e) {
+                        if(isLiveStream(li)) {
+                            // -1 for no duration
+                            return -1;
+                        } else {
+                            throw new ParsingException("Could not get Duration: " + getTitle(), e);
+                        }
+                    }
+                }
+
+                @Override
+                public String getUploader() throws ParsingException {
+                    return li.select("div[class=pl-video-owner] a").text();
+                }
+
+                @Override
+                public String getUploadDate() throws ParsingException {
+                    return "";
+                }
+
+                @Override
+                public long getViewCount() throws ParsingException {
+                    return -1;
+                }
+
+                @Override
+                public String getThumbnailUrl() throws ParsingException {
+                    try {
+                        return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg";
+                    } catch (Exception e) {
+                        throw new ParsingException("Could not get thumbnail url", e);
+                    }
+                }
+
+                @Override
+                public boolean isAd() throws ParsingException {
+                    return false;
+                }
+
+                private boolean isLiveStream(Element item) {
+                    Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
+
+                    if(bla == null) {
+                        // sometimes livestreams dont have badges but sill are live streams
+                        // if video time is not available we most likly have an offline livestream
+                        if(item.select("span[class*=\"video-time\"]").first() == null) {
+                            return true;
+                        }
+                    }
+                    return bla != null;
+                }
+            });
+        }
+
+        return collector;
+    }
+
+    @Override
+    public boolean hasNextPage() throws ParsingException {
+        return nextPageUrl != null && !nextPageUrl.isEmpty();
+    }
+
+    private String getNextPageUrl(Document d) throws ParsingException {
+        try {
+            Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
+            if(button != null) {
+                return "https://www.youtube.com" + button.attr("data-uix-load-more-href");
+            } else {
+                // sometimes channels are simply so small, they don't have a second/next4q page
+                return "";
+            }
+        } catch(Exception e) {
+            throw new ParsingException("could not load next page url", e);
+        }
+    }
+}
--- a/services/youtube/YoutubePlayListUrlIdHandler.java
+++ b/services/youtube/YoutubePlayListUrlIdHandler.java
@ -0,0 +1,38 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import android.net.UrlQuerySanitizer;
+
+import org.schabi.newpipe.extractor.Parser;
+import org.schabi.newpipe.extractor.UrlIdHandler;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+public class YoutubePlayListUrlIdHandler implements UrlIdHandler {
+
+    private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{34})";
+
+    @Override
+    public String getUrl(String listId) {
+        return "https://www.youtube.com/playlist?list=" + listId;
+    }
+
+    @Override
+    public String getId(String url) throws ParsingException {
+        try {
+            return Parser.matchGroup1("list=" + ID_PATTERN, url);
+        } catch (final Exception exception) {
+            throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception);
+        }
+    }
+
+    @Override
+    public String cleanUrl(String complexUrl) throws ParsingException {
+        return getUrl(getId(complexUrl));
+    }
+
+    @Override
+    public boolean acceptUrl(String videoUrl) {
+        final boolean hasNotEmptyUrl = videoUrl != null && !videoUrl.isEmpty();
+        final boolean isYoutubeDomain = hasNotEmptyUrl && (videoUrl.contains("youtube") || videoUrl.contains("youtu.be"));
+        return isYoutubeDomain && videoUrl.contains("list=");
+    }
+}
--- a/services/youtube/YoutubeService.java
+++ b/services/youtube/YoutubeService.java
@ -69,12 +69,23 @@ public class YoutubeService extends StreamingService {
        return new YoutubeChannelUrlIdHandler();
    }

+
+    @Override
+    public UrlIdHandler getPlayListUrlIdHandlerInstance() {
+        return new YoutubePlayListUrlIdHandler();
+    }
+
    @Override
    public ChannelExtractor getChannelExtractorInstance(String url, int page)
        throws ExtractionException, IOException {
        return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, page, getServiceId());
    }

+    public PlayListExtractor getPlayListExtractorInstance(String url, int page)
+        throws ExtractionException, IOException {
+        return new YoutubePlayListExtractor(getPlayListUrlIdHandlerInstance(), url, page, getServiceId());
+    }
+
    @Override
    public SuggestionExtractor getSuggestionExtractorInstance() {
        return new YoutubeSuggestionExtractor(getServiceId());