Merge branch 'master' of https://github.com/BlenderViking/NewPipeExtractor into play
This commit is contained in:
commit
6ab3dc876e
9 changed files with 435 additions and 0 deletions
|
@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor;
|
|||
|
||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
|
||||
import org.schabi.newpipe.extractor.search.SearchEngine;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
|
||||
|
||||
|
@ -52,8 +53,11 @@ public abstract class StreamingService {
|
|||
public abstract SearchEngine getSearchEngineInstance();
|
||||
public abstract UrlIdHandler getStreamUrlIdHandlerInstance();
|
||||
public abstract UrlIdHandler getChannelUrlIdHandlerInstance();
|
||||
public abstract UrlIdHandler getPlayListUrlIdHandlerInstance();
|
||||
public abstract ChannelExtractor getChannelExtractorInstance(String url, int page)
|
||||
throws ExtractionException, IOException;
|
||||
public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page)
|
||||
throws ExtractionException, IOException;
|
||||
public abstract SuggestionExtractor getSuggestionExtractorInstance();
|
||||
|
||||
public final int getServiceId() {
|
||||
|
@ -66,11 +70,14 @@ public abstract class StreamingService {
|
|||
public final LinkType getLinkTypeByUrl(String url) {
|
||||
UrlIdHandler sH = getStreamUrlIdHandlerInstance();
|
||||
UrlIdHandler cH = getChannelUrlIdHandlerInstance();
|
||||
UrlIdHandler pH = getPlayListUrlIdHandlerInstance();
|
||||
|
||||
if(sH.acceptUrl(url)) {
|
||||
return LinkType.STREAM;
|
||||
} else if(cH.acceptUrl(url)) {
|
||||
return LinkType.CHANNEL;
|
||||
} else if (pH.acceptUrl(url)) {
|
||||
return LinkType.PLAYLIST;
|
||||
} else {
|
||||
return LinkType.NONE;
|
||||
}
|
||||
|
|
41
playlist/PlayListExtractor.java
Normal file
41
playlist/PlayListExtractor.java
Normal file
|
@ -0,0 +1,41 @@
|
|||
package org.schabi.newpipe.extractor.playlist;
|
||||
|
||||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public abstract class PlayListExtractor {
|
||||
|
||||
private int serviceId;
|
||||
private String url;
|
||||
private UrlIdHandler urlIdHandler;
|
||||
private StreamInfoItemCollector previewInfoCollector;
|
||||
private int page = -1;
|
||||
|
||||
public PlayListExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
|
||||
throws ExtractionException, IOException {
|
||||
this.url = url;
|
||||
this.page = page;
|
||||
this.serviceId = serviceId;
|
||||
this.urlIdHandler = urlIdHandler;
|
||||
previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
|
||||
}
|
||||
|
||||
public String getUrl() { return url; }
|
||||
public UrlIdHandler getUrlIdHandler() { return urlIdHandler; }
|
||||
public StreamInfoItemCollector getStreamPreviewInfoCollector() {
|
||||
return previewInfoCollector;
|
||||
}
|
||||
|
||||
public abstract String getName() throws ParsingException;
|
||||
public abstract String getAvatarUrl() throws ParsingException;
|
||||
public abstract String getBannerUrl() throws ParsingException;
|
||||
public abstract StreamInfoItemCollector getStreams() throws ParsingException;
|
||||
public abstract boolean hasNextPage() throws ParsingException;
|
||||
public int getServiceId() {
|
||||
return serviceId;
|
||||
}
|
||||
}
|
51
playlist/PlayListInfo.java
Normal file
51
playlist/PlayListInfo.java
Normal file
|
@ -0,0 +1,51 @@
|
|||
package org.schabi.newpipe.extractor.playlist;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Vector;
|
||||
|
||||
public class PlayListInfo {
|
||||
|
||||
public void addException(Exception e) {
|
||||
errors.add(e);
|
||||
}
|
||||
|
||||
public static PlayListInfo getInfo(PlayListExtractor extractor) throws ParsingException {
|
||||
PlayListInfo info = new PlayListInfo();
|
||||
|
||||
info.playList_name = extractor.getName();
|
||||
info.hasNextPage = extractor.hasNextPage();
|
||||
|
||||
try {
|
||||
info.avatar_url = extractor.getAvatarUrl();
|
||||
} catch (Exception e) {
|
||||
info.errors.add(e);
|
||||
}
|
||||
try {
|
||||
info.banner_url = extractor.getBannerUrl();
|
||||
} catch (Exception e) {
|
||||
info.errors.add(e);
|
||||
}
|
||||
try {
|
||||
StreamInfoItemCollector c = extractor.getStreams();
|
||||
info.related_streams = c.getItemList();
|
||||
info.errors.addAll(c.getErrors());
|
||||
} catch(Exception e) {
|
||||
info.errors.add(e);
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
public int service_id = -1;
|
||||
public String playList_name = "";
|
||||
public String avatar_url = "";
|
||||
public String banner_url = "";
|
||||
public List<InfoItem> related_streams = null;
|
||||
public boolean hasNextPage = false;
|
||||
|
||||
public List<Throwable> errors = new Vector<>();
|
||||
}
|
21
playlist/PlayListInfoItem.java
Normal file
21
playlist/PlayListInfoItem.java
Normal file
|
@ -0,0 +1,21 @@
|
|||
package org.schabi.newpipe.extractor.playlist;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
|
||||
public class PlayListInfoItem implements InfoItem {
|
||||
|
||||
public int serviceId = -1;
|
||||
public String name = "";
|
||||
public String thumbnailUrl = "";
|
||||
public String webPageUrl = "";
|
||||
|
||||
public InfoType infoType() {
|
||||
return InfoType.PLAYLIST;
|
||||
}
|
||||
public String getTitle() {
|
||||
return name;
|
||||
}
|
||||
public String getLink() {
|
||||
return webPageUrl;
|
||||
}
|
||||
}
|
33
playlist/PlayListInfoItemCollector.java
Normal file
33
playlist/PlayListInfoItemCollector.java
Normal file
|
@ -0,0 +1,33 @@
|
|||
package org.schabi.newpipe.extractor.playlist;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItemCollector;
|
||||
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
public class PlayListInfoItemCollector extends InfoItemCollector {
|
||||
public PlayListInfoItemCollector(int serviceId) {
|
||||
super(serviceId);
|
||||
}
|
||||
|
||||
public PlayListInfoItem extract(PlayListInfoItemExtractor extractor) throws ParsingException {
|
||||
final PlayListInfoItem resultItem = new PlayListInfoItem();
|
||||
|
||||
resultItem.name = extractor.getPlayListName();
|
||||
resultItem.serviceId = getServiceId();
|
||||
resultItem.webPageUrl = extractor.getWebPageUrl();
|
||||
try {
|
||||
resultItem.thumbnailUrl = extractor.getThumbnailUrl();
|
||||
} catch (Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
return resultItem;
|
||||
}
|
||||
|
||||
public void commit(PlayListInfoItemExtractor extractor) throws ParsingException {
|
||||
try {
|
||||
addItem(extract(extractor));
|
||||
} catch (Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
}
|
||||
}
|
9
playlist/PlayListInfoItemExtractor.java
Normal file
9
playlist/PlayListInfoItemExtractor.java
Normal file
|
@ -0,0 +1,9 @@
|
|||
package org.schabi.newpipe.extractor.playlist;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
public interface PlayListInfoItemExtractor {
|
||||
String getThumbnailUrl() throws ParsingException;
|
||||
String getPlayListName() throws ParsingException;
|
||||
String getWebPageUrl() throws ParsingException;
|
||||
}
|
223
services/youtube/YoutubePlayListExtractor.java
Normal file
223
services/youtube/YoutubePlayListExtractor.java
Normal file
|
@ -0,0 +1,223 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.AbstractStreamInfo;
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.Parser;
|
||||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class YoutubePlayListExtractor extends PlayListExtractor {
|
||||
|
||||
private String TAG = YoutubePlayListExtractor.class.toString();
|
||||
|
||||
private Document doc = null;
|
||||
|
||||
private boolean isAjaxPage = false;
|
||||
private static String name = "";
|
||||
private static String feedUrl = "";
|
||||
private static String avatarUrl = "";
|
||||
private static String bannerUrl = "";
|
||||
private static String nextPageUrl = "";
|
||||
|
||||
public YoutubePlayListExtractor(UrlIdHandler urlIdHandler,
|
||||
String url, int page, int serviceId) throws IOException, ExtractionException {
|
||||
super(urlIdHandler, url, page, serviceId);
|
||||
Downloader downloader = NewPipe.getDownloader();
|
||||
url = urlIdHandler.cleanUrl(url);
|
||||
if(page == 0) {
|
||||
String channelPageContent = downloader.download(url);
|
||||
doc = Jsoup.parse(channelPageContent, url);
|
||||
nextPageUrl = getNextPageUrl(doc);
|
||||
isAjaxPage = false;
|
||||
} else {
|
||||
String ajaxDataRaw = downloader.download(nextPageUrl);
|
||||
JSONObject ajaxData;
|
||||
try {
|
||||
ajaxData = new JSONObject(ajaxDataRaw);
|
||||
final String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
|
||||
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
|
||||
final String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
|
||||
if(!nextPageHtmlDataRaw.isEmpty()) {
|
||||
final Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
|
||||
nextPageUrl = getNextPageUrl(nextPageData);
|
||||
} else {
|
||||
nextPageUrl = "";
|
||||
}
|
||||
} catch (JSONException e) {
|
||||
throw new ParsingException("Could not parse json data for next page", e);
|
||||
}
|
||||
isAjaxPage = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
if (!isAjaxPage) {
|
||||
name = doc.select("span[class=\"qualified-channel-title-text\"]").first()
|
||||
.select("a").first().text() + " - " +
|
||||
doc.select("meta[name=title]").first().attr("content");
|
||||
}
|
||||
return name;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist name");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAvatarUrl() throws ParsingException {
|
||||
try {
|
||||
if(!isAjaxPage) {
|
||||
avatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("src");
|
||||
if(avatarUrl.startsWith("//")) {
|
||||
avatarUrl = "https:" + avatarUrl;
|
||||
}
|
||||
}
|
||||
return avatarUrl;
|
||||
} catch(Exception e) {
|
||||
throw new ParsingException("Could not get playlist Avatar");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBannerUrl() throws ParsingException {
|
||||
try {
|
||||
if(!isAjaxPage) {
|
||||
Element el = doc.select("div[id=\"gh-banner\"] style").first();
|
||||
String cssContent = el.html();
|
||||
String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent);
|
||||
if (url.contains("s.ytimg.com")) {
|
||||
bannerUrl = null;
|
||||
} else {
|
||||
bannerUrl = url.substring(0, url.indexOf(");"));
|
||||
}
|
||||
}
|
||||
return bannerUrl;
|
||||
} catch(Exception e) {
|
||||
throw new ParsingException("Could not get playlist Banner");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamInfoItemCollector getStreams() throws ParsingException {
|
||||
StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
|
||||
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
|
||||
final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
|
||||
for(final Element li : tbody.children()) {
|
||||
collector.commit(new StreamInfoItemExtractor() {
|
||||
@Override
|
||||
public AbstractStreamInfo.StreamType getStreamType() throws ParsingException {
|
||||
return AbstractStreamInfo.StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getWebPageUrl() throws ParsingException {
|
||||
try {
|
||||
return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id"));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTitle() throws ParsingException {
|
||||
try {
|
||||
return li.attr("data-title");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get title", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDuration() throws ParsingException {
|
||||
try {
|
||||
return YoutubeParsingHelper.parseDurationString(
|
||||
li.select("div[class=\"timestamp\"] span").first().text().trim());
|
||||
} catch(Exception e) {
|
||||
if(isLiveStream(li)) {
|
||||
// -1 for no duration
|
||||
return -1;
|
||||
} else {
|
||||
throw new ParsingException("Could not get Duration: " + getTitle(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploader() throws ParsingException {
|
||||
return li.select("div[class=pl-video-owner] a").text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploadDate() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg";
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAd() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isLiveStream(Element item) {
|
||||
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
|
||||
|
||||
if(bla == null) {
|
||||
// sometimes livestreams dont have badges but sill are live streams
|
||||
// if video time is not available we most likly have an offline livestream
|
||||
if(item.select("span[class*=\"video-time\"]").first() == null) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return bla != null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNextPage() throws ParsingException {
|
||||
return nextPageUrl != null && !nextPageUrl.isEmpty();
|
||||
}
|
||||
|
||||
private String getNextPageUrl(Document d) throws ParsingException {
|
||||
try {
|
||||
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
||||
if(button != null) {
|
||||
return "https://www.youtube.com" + button.attr("data-uix-load-more-href");
|
||||
} else {
|
||||
// sometimes channels are simply so small, they don't have a second/next4q page
|
||||
return "";
|
||||
}
|
||||
} catch(Exception e) {
|
||||
throw new ParsingException("could not load next page url", e);
|
||||
}
|
||||
}
|
||||
}
|
38
services/youtube/YoutubePlayListUrlIdHandler.java
Normal file
38
services/youtube/YoutubePlayListUrlIdHandler.java
Normal file
|
@ -0,0 +1,38 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube;
|
||||
|
||||
import android.net.UrlQuerySanitizer;
|
||||
|
||||
import org.schabi.newpipe.extractor.Parser;
|
||||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
public class YoutubePlayListUrlIdHandler implements UrlIdHandler {
|
||||
|
||||
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{34})";
|
||||
|
||||
@Override
|
||||
public String getUrl(String listId) {
|
||||
return "https://www.youtube.com/playlist?list=" + listId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getId(String url) throws ParsingException {
|
||||
try {
|
||||
return Parser.matchGroup1("list=" + ID_PATTERN, url);
|
||||
} catch (final Exception exception) {
|
||||
throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String cleanUrl(String complexUrl) throws ParsingException {
|
||||
return getUrl(getId(complexUrl));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptUrl(String videoUrl) {
|
||||
final boolean hasNotEmptyUrl = videoUrl != null && !videoUrl.isEmpty();
|
||||
final boolean isYoutubeDomain = hasNotEmptyUrl && (videoUrl.contains("youtube") || videoUrl.contains("youtu.be"));
|
||||
return isYoutubeDomain && videoUrl.contains("list=");
|
||||
}
|
||||
}
|
|
@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.StreamingService;
|
|||
import org.schabi.newpipe.extractor.UrlIdHandler;
|
||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
|
||||
import org.schabi.newpipe.extractor.search.SearchEngine;
|
||||
import org.schabi.newpipe.extractor.SuggestionExtractor;
|
||||
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
|
||||
|
@ -69,12 +70,23 @@ public class YoutubeService extends StreamingService {
|
|||
return new YoutubeChannelUrlIdHandler();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public UrlIdHandler getPlayListUrlIdHandlerInstance() {
|
||||
return new YoutubePlayListUrlIdHandler();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ChannelExtractor getChannelExtractorInstance(String url, int page)
|
||||
throws ExtractionException, IOException {
|
||||
return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, page, getServiceId());
|
||||
}
|
||||
|
||||
public PlayListExtractor getPlayListExtractorInstance(String url, int page)
|
||||
throws ExtractionException, IOException {
|
||||
return new YoutubePlayListExtractor(getPlayListUrlIdHandlerInstance(), url, page, getServiceId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SuggestionExtractor getSuggestionExtractorInstance() {
|
||||
return new YoutubeSuggestionExtractor(getServiceId());
|
||||
|
|
Loading…
Reference in a new issue