This commit is contained in:
Christian Schabesberger 2017-03-21 22:03:41 +01:00
commit 6ab3dc876e
9 changed files with 435 additions and 0 deletions

View file

@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.search.SearchEngine;
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
@ -52,8 +53,11 @@ public abstract class StreamingService {
public abstract SearchEngine getSearchEngineInstance();
public abstract UrlIdHandler getStreamUrlIdHandlerInstance();
public abstract UrlIdHandler getChannelUrlIdHandlerInstance();
public abstract UrlIdHandler getPlayListUrlIdHandlerInstance();
public abstract ChannelExtractor getChannelExtractorInstance(String url, int page)
throws ExtractionException, IOException;
public abstract PlayListExtractor getPlayListExtractorInstance(String url, int page)
throws ExtractionException, IOException;
public abstract SuggestionExtractor getSuggestionExtractorInstance();
public final int getServiceId() {
@ -66,11 +70,14 @@ public abstract class StreamingService {
public final LinkType getLinkTypeByUrl(String url) {
UrlIdHandler sH = getStreamUrlIdHandlerInstance();
UrlIdHandler cH = getChannelUrlIdHandlerInstance();
UrlIdHandler pH = getPlayListUrlIdHandlerInstance();
if(sH.acceptUrl(url)) {
return LinkType.STREAM;
} else if(cH.acceptUrl(url)) {
return LinkType.CHANNEL;
} else if (pH.acceptUrl(url)) {
return LinkType.PLAYLIST;
} else {
return LinkType.NONE;
}

View file

@ -0,0 +1,41 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import java.io.IOException;
public abstract class PlayListExtractor {
private int serviceId;
private String url;
private UrlIdHandler urlIdHandler;
private StreamInfoItemCollector previewInfoCollector;
private int page = -1;
public PlayListExtractor(UrlIdHandler urlIdHandler, String url, int page, int serviceId)
throws ExtractionException, IOException {
this.url = url;
this.page = page;
this.serviceId = serviceId;
this.urlIdHandler = urlIdHandler;
previewInfoCollector = new StreamInfoItemCollector(urlIdHandler, serviceId);
}
public String getUrl() { return url; }
public UrlIdHandler getUrlIdHandler() { return urlIdHandler; }
public StreamInfoItemCollector getStreamPreviewInfoCollector() {
return previewInfoCollector;
}
public abstract String getName() throws ParsingException;
public abstract String getAvatarUrl() throws ParsingException;
public abstract String getBannerUrl() throws ParsingException;
public abstract StreamInfoItemCollector getStreams() throws ParsingException;
public abstract boolean hasNextPage() throws ParsingException;
public int getServiceId() {
return serviceId;
}
}

View file

@ -0,0 +1,51 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import java.util.List;
import java.util.Vector;
public class PlayListInfo {
public void addException(Exception e) {
errors.add(e);
}
public static PlayListInfo getInfo(PlayListExtractor extractor) throws ParsingException {
PlayListInfo info = new PlayListInfo();
info.playList_name = extractor.getName();
info.hasNextPage = extractor.hasNextPage();
try {
info.avatar_url = extractor.getAvatarUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
info.banner_url = extractor.getBannerUrl();
} catch (Exception e) {
info.errors.add(e);
}
try {
StreamInfoItemCollector c = extractor.getStreams();
info.related_streams = c.getItemList();
info.errors.addAll(c.getErrors());
} catch(Exception e) {
info.errors.add(e);
}
return info;
}
public int service_id = -1;
public String playList_name = "";
public String avatar_url = "";
public String banner_url = "";
public List<InfoItem> related_streams = null;
public boolean hasNextPage = false;
public List<Throwable> errors = new Vector<>();
}

View file

@ -0,0 +1,21 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItem;
public class PlayListInfoItem implements InfoItem {
public int serviceId = -1;
public String name = "";
public String thumbnailUrl = "";
public String webPageUrl = "";
public InfoType infoType() {
return InfoType.PLAYLIST;
}
public String getTitle() {
return name;
}
public String getLink() {
return webPageUrl;
}
}

View file

@ -0,0 +1,33 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.InfoItemCollector;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public class PlayListInfoItemCollector extends InfoItemCollector {
public PlayListInfoItemCollector(int serviceId) {
super(serviceId);
}
public PlayListInfoItem extract(PlayListInfoItemExtractor extractor) throws ParsingException {
final PlayListInfoItem resultItem = new PlayListInfoItem();
resultItem.name = extractor.getPlayListName();
resultItem.serviceId = getServiceId();
resultItem.webPageUrl = extractor.getWebPageUrl();
try {
resultItem.thumbnailUrl = extractor.getThumbnailUrl();
} catch (Exception e) {
addError(e);
}
return resultItem;
}
public void commit(PlayListInfoItemExtractor extractor) throws ParsingException {
try {
addItem(extract(extractor));
} catch (Exception e) {
addError(e);
}
}
}

View file

@ -0,0 +1,9 @@
package org.schabi.newpipe.extractor.playlist;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public interface PlayListInfoItemExtractor {
String getThumbnailUrl() throws ParsingException;
String getPlayListName() throws ParsingException;
String getWebPageUrl() throws ParsingException;
}

View file

@ -0,0 +1,223 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.AbstractStreamInfo;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream_info.StreamInfoItemExtractor;
import java.io.IOException;
public class YoutubePlayListExtractor extends PlayListExtractor {
private String TAG = YoutubePlayListExtractor.class.toString();
private Document doc = null;
private boolean isAjaxPage = false;
private static String name = "";
private static String feedUrl = "";
private static String avatarUrl = "";
private static String bannerUrl = "";
private static String nextPageUrl = "";
public YoutubePlayListExtractor(UrlIdHandler urlIdHandler,
String url, int page, int serviceId) throws IOException, ExtractionException {
super(urlIdHandler, url, page, serviceId);
Downloader downloader = NewPipe.getDownloader();
url = urlIdHandler.cleanUrl(url);
if(page == 0) {
String channelPageContent = downloader.download(url);
doc = Jsoup.parse(channelPageContent, url);
nextPageUrl = getNextPageUrl(doc);
isAjaxPage = false;
} else {
String ajaxDataRaw = downloader.download(nextPageUrl);
JSONObject ajaxData;
try {
ajaxData = new JSONObject(ajaxDataRaw);
final String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
final String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
if(!nextPageHtmlDataRaw.isEmpty()) {
final Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
nextPageUrl = getNextPageUrl(nextPageData);
} else {
nextPageUrl = "";
}
} catch (JSONException e) {
throw new ParsingException("Could not parse json data for next page", e);
}
isAjaxPage = true;
}
}
@Override
public String getName() throws ParsingException {
try {
if (!isAjaxPage) {
name = doc.select("span[class=\"qualified-channel-title-text\"]").first()
.select("a").first().text() + " - " +
doc.select("meta[name=title]").first().attr("content");
}
return name;
} catch (Exception e) {
throw new ParsingException("Could not get playlist name");
}
}
@Override
public String getAvatarUrl() throws ParsingException {
try {
if(!isAjaxPage) {
avatarUrl = doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("src");
if(avatarUrl.startsWith("//")) {
avatarUrl = "https:" + avatarUrl;
}
}
return avatarUrl;
} catch(Exception e) {
throw new ParsingException("Could not get playlist Avatar");
}
}
@Override
public String getBannerUrl() throws ParsingException {
try {
if(!isAjaxPage) {
Element el = doc.select("div[id=\"gh-banner\"] style").first();
String cssContent = el.html();
String url = "https:" + Parser.matchGroup1("url\\((.*)\\)", cssContent);
if (url.contains("s.ytimg.com")) {
bannerUrl = null;
} else {
bannerUrl = url.substring(0, url.indexOf(");"));
}
}
return bannerUrl;
} catch(Exception e) {
throw new ParsingException("Could not get playlist Banner");
}
}
@Override
public StreamInfoItemCollector getStreams() throws ParsingException {
StreamInfoItemCollector collector = getStreamPreviewInfoCollector();
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
final YoutubeStreamUrlIdHandler youtubeStreamUrlIdHandler = YoutubeStreamUrlIdHandler.getInstance();
for(final Element li : tbody.children()) {
collector.commit(new StreamInfoItemExtractor() {
@Override
public AbstractStreamInfo.StreamType getStreamType() throws ParsingException {
return AbstractStreamInfo.StreamType.VIDEO_STREAM;
}
@Override
public String getWebPageUrl() throws ParsingException {
try {
return youtubeStreamUrlIdHandler.getUrl(li.attr("data-video-id"));
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getTitle() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("div[class=\"timestamp\"] span").first().text().trim());
} catch(Exception e) {
if(isLiveStream(li)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
}
@Override
public String getUploader() throws ParsingException {
return li.select("div[class=pl-video-owner] a").text();
}
@Override
public String getUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + youtubeStreamUrlIdHandler.getId(getWebPageUrl()) + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@Override
public boolean isAd() throws ParsingException {
return false;
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if(bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if(item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
});
}
return collector;
}
@Override
public boolean hasNextPage() throws ParsingException {
return nextPageUrl != null && !nextPageUrl.isEmpty();
}
private String getNextPageUrl(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if(button != null) {
return "https://www.youtube.com" + button.attr("data-uix-load-more-href");
} else {
// sometimes channels are simply so small, they don't have a second/next4q page
return "";
}
} catch(Exception e) {
throw new ParsingException("could not load next page url", e);
}
}
}

View file

@ -0,0 +1,38 @@
package org.schabi.newpipe.extractor.services.youtube;
import android.net.UrlQuerySanitizer;
import org.schabi.newpipe.extractor.Parser;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
public class YoutubePlayListUrlIdHandler implements UrlIdHandler {
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{34})";
@Override
public String getUrl(String listId) {
return "https://www.youtube.com/playlist?list=" + listId;
}
@Override
public String getId(String url) throws ParsingException {
try {
return Parser.matchGroup1("list=" + ID_PATTERN, url);
} catch (final Exception exception) {
throw new ParsingException("Error could not parse url :" + exception.getMessage(), exception);
}
}
@Override
public String cleanUrl(String complexUrl) throws ParsingException {
return getUrl(getId(complexUrl));
}
@Override
public boolean acceptUrl(String videoUrl) {
final boolean hasNotEmptyUrl = videoUrl != null && !videoUrl.isEmpty();
final boolean isYoutubeDomain = hasNotEmptyUrl && (videoUrl.contains("youtube") || videoUrl.contains("youtu.be"));
return isYoutubeDomain && videoUrl.contains("list=");
}
}

View file

@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.playlist.PlayListExtractor;
import org.schabi.newpipe.extractor.search.SearchEngine;
import org.schabi.newpipe.extractor.SuggestionExtractor;
import org.schabi.newpipe.extractor.stream_info.StreamExtractor;
@ -69,12 +70,23 @@ public class YoutubeService extends StreamingService {
return new YoutubeChannelUrlIdHandler();
}
@Override
public UrlIdHandler getPlayListUrlIdHandlerInstance() {
return new YoutubePlayListUrlIdHandler();
}
@Override
public ChannelExtractor getChannelExtractorInstance(String url, int page)
throws ExtractionException, IOException {
return new YoutubeChannelExtractor(getChannelUrlIdHandlerInstance(), url, page, getServiceId());
}
public PlayListExtractor getPlayListExtractorInstance(String url, int page)
throws ExtractionException, IOException {
return new YoutubePlayListExtractor(getPlayListUrlIdHandlerInstance(), url, page, getServiceId());
}
@Override
public SuggestionExtractor getSuggestionExtractorInstance() {
return new YoutubeSuggestionExtractor(getServiceId());