added http post method in downloader, formatting
This commit is contained in:
parent
8b8779b176
commit
95575756ee
8 changed files with 442 additions and 464 deletions
|
@ -4,23 +4,21 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
public class DownloadResponse {
|
||||
private final String responseBody;
|
||||
private final Map<String, List<String>> responseHeaders;
|
||||
|
||||
|
||||
private final String responseBody;
|
||||
private final Map<String, List<String>> responseHeaders;
|
||||
|
||||
public DownloadResponse(String responseBody, Map<String, List<String>> headers) {
|
||||
super();
|
||||
this.responseBody = responseBody;
|
||||
this.responseHeaders = headers;
|
||||
}
|
||||
public DownloadResponse(String responseBody, Map<String, List<String>> headers) {
|
||||
super();
|
||||
this.responseBody = responseBody;
|
||||
this.responseHeaders = headers;
|
||||
}
|
||||
|
||||
public String getResponseBody() {
|
||||
return responseBody;
|
||||
}
|
||||
public String getResponseBody() {
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
public Map<String, List<String>> getResponseHeaders() {
|
||||
return responseHeaders;
|
||||
}
|
||||
|
||||
public Map<String, List<String>> getResponseHeaders() {
|
||||
return responseHeaders;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,41 +28,44 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|||
|
||||
public interface Downloader {
|
||||
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param language the language (usually a 2-character code) to set as the
|
||||
* preferred language
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl, String language) throws IOException, ReCaptchaException;
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param language the language (usually a 2-character code) to set as the
|
||||
* preferred language
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl, String language) throws IOException, ReCaptchaException;
|
||||
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param customProperties set request header properties
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param customProperties set request header properties
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException;
|
||||
|
||||
/**
|
||||
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||
* contents. Primarily intended for downloading web pages.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to download
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl) throws IOException, ReCaptchaException;
|
||||
/**
|
||||
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||
* contents. Primarily intended for downloading web pages.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to download
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
String download(String siteUrl) throws IOException, ReCaptchaException;
|
||||
|
||||
DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException;
|
||||
DownloadResponse get(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException;
|
||||
|
||||
DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException;
|
||||
DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException;
|
||||
|
||||
DownloadResponse post(String siteUrl, String requestBody, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException;
|
||||
}
|
||||
|
|
|
@ -4,19 +4,11 @@ import org.schabi.newpipe.extractor.InfoItemExtractor;
|
|||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
public interface CommentsInfoItemExtractor extends InfoItemExtractor {
|
||||
|
||||
String getCommentId() throws ParsingException;
|
||||
|
||||
String getCommentText() throws ParsingException;
|
||||
|
||||
String getAuthorName() throws ParsingException;
|
||||
|
||||
String getAuthorThumbnail() throws ParsingException;
|
||||
|
||||
String getAuthorEndpoint() throws ParsingException;
|
||||
|
||||
String getPublishedTime() throws ParsingException;
|
||||
|
||||
Integer getLikeCount() throws ParsingException;
|
||||
|
||||
String getCommentId() throws ParsingException;
|
||||
String getCommentText() throws ParsingException;
|
||||
String getAuthorName() throws ParsingException;
|
||||
String getAuthorThumbnail() throws ParsingException;
|
||||
String getAuthorEndpoint() throws ParsingException;
|
||||
String getPublishedTime() throws ParsingException;
|
||||
Integer getLikeCount() throws ParsingException;
|
||||
}
|
||||
|
|
|
@ -7,26 +7,6 @@ import org.schabi.newpipe.extractor.InfoItem;
|
|||
import org.schabi.newpipe.extractor.InfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 28.02.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
* CommentsInfoItemsCollector.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoItem, CommentsInfoItemExtractor> {
|
||||
|
||||
public CommentsInfoItemsCollector(int serviceId) {
|
||||
|
|
|
@ -1,20 +1,17 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import javax.net.ssl.HttpsURLConnection;
|
||||
|
||||
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||
|
@ -22,6 +19,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
|||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
|
@ -29,235 +27,222 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
|||
|
||||
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0";
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0";
|
||||
|
||||
private List<String> cookies;
|
||||
private String sessionToken;
|
||||
private String commentsToken;
|
||||
private List<String> cookies;
|
||||
private String sessionToken;
|
||||
private String commentsToken;
|
||||
|
||||
private ObjectMapper mapper = new ObjectMapper();
|
||||
private ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
||||
super(service, uiHandler);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
||||
super(service, uiHandler);
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||
// initial page does not load any comments but is required to get session token
|
||||
// and cookies
|
||||
return getPage(getNextPageUrl());
|
||||
}
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||
// initial page does not load any comments but is required to get session token
|
||||
// and cookies
|
||||
return getPage(getNextPageUrl());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws IOException, ExtractionException {
|
||||
return getNextPageUrl(commentsToken);
|
||||
}
|
||||
@Override
|
||||
public String getNextPageUrl() throws IOException, ExtractionException {
|
||||
return getNextPageUrl(commentsToken);
|
||||
}
|
||||
|
||||
private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException {
|
||||
Optional<JsonNode> element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation"))
|
||||
.map(e -> e.get("continuations")).map(e -> e.findValue("continuation"));
|
||||
private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException {
|
||||
Optional<JsonNode> element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation"))
|
||||
.map(e -> e.get("continuations")).map(e -> e.findValue("continuation"));
|
||||
|
||||
if (element.isPresent()) {
|
||||
return getNextPageUrl(element.get().asText());
|
||||
} else {
|
||||
// no more comments
|
||||
return "";
|
||||
}
|
||||
}
|
||||
if (element.isPresent()) {
|
||||
return getNextPageUrl(element.get().asText());
|
||||
} else {
|
||||
// no more comments
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
private String getNextPageUrl(String continuation) throws ParsingException {
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("action_get_comments", "1");
|
||||
params.put("pbj", "1");
|
||||
params.put("ctoken", continuation);
|
||||
params.put("continuation", continuation);
|
||||
try {
|
||||
return "https://www.youtube.com/comment_service_ajax?" + getDataString(params);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new ParsingException("Could not get next page url", e);
|
||||
}
|
||||
}
|
||||
private String getNextPageUrl(String continuation) throws ParsingException {
|
||||
Map<String, String> params = new HashMap<>();
|
||||
params.put("action_get_comments", "1");
|
||||
params.put("pbj", "1");
|
||||
params.put("ctoken", continuation);
|
||||
params.put("continuation", continuation);
|
||||
try {
|
||||
return "https://www.youtube.com/comment_service_ajax?" + getDataString(params);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new ParsingException("Could not get next page url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||
}
|
||||
String ajaxResponse = makeAjaxRequest(pageUrl);
|
||||
JsonNode ajaxJson = mapper.readTree(ajaxResponse);
|
||||
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||
collectCommentsFrom(collector, ajaxJson, pageUrl);
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson));
|
||||
}
|
||||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||
}
|
||||
String ajaxResponse = makeAjaxRequest(pageUrl);
|
||||
JsonNode ajaxJson = mapper.readTree(ajaxResponse);
|
||||
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||
collectCommentsFrom(collector, ajaxJson, pageUrl);
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson));
|
||||
}
|
||||
|
||||
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) {
|
||||
List<JsonNode> comments = ajaxJson.findValues("commentRenderer");
|
||||
comments.stream().forEach(c -> {
|
||||
CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() {
|
||||
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) {
|
||||
List<JsonNode> comments = ajaxJson.findValues("commentRenderer");
|
||||
comments.stream().forEach(c -> {
|
||||
CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() {
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
return pageUrl;
|
||||
}
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
return pageUrl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorText").get("simpleText").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorText").get("simpleText").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPublishedTime() throws ParsingException {
|
||||
try {
|
||||
return c.get("publishedTimeText").get("runs").get(0).get("text").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getPublishedTime() throws ParsingException {
|
||||
try {
|
||||
return c.get("publishedTimeText").get("runs").get(0).get("text").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer getLikeCount() throws ParsingException {
|
||||
try {
|
||||
return c.get("likeCount").intValue();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public Integer getLikeCount() throws ParsingException {
|
||||
try {
|
||||
return c.get("likeCount").intValue();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCommentText() throws ParsingException {
|
||||
try {
|
||||
if (null != c.get("contentText").get("simpleText")) {
|
||||
return c.get("contentText").get("simpleText").asText();
|
||||
} else {
|
||||
return c.get("contentText").get("runs").get(0).get("text").asText();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getCommentText() throws ParsingException {
|
||||
try {
|
||||
if (null != c.get("contentText").get("simpleText")) {
|
||||
return c.get("contentText").get("simpleText").asText();
|
||||
} else {
|
||||
return c.get("contentText").get("runs").get(0).get("text").asText();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCommentId() throws ParsingException {
|
||||
try {
|
||||
return c.get("commentId").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getCommentId() throws ParsingException {
|
||||
try {
|
||||
return c.get("commentId").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAuthorThumbnail() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getAuthorThumbnail() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAuthorName() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorText").get("simpleText").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public String getAuthorName() throws ParsingException {
|
||||
try {
|
||||
return c.get("authorText").get("simpleText").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAuthorEndpoint() throws ParsingException {
|
||||
try {
|
||||
return "https://youtube.com"
|
||||
+ c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
};
|
||||
@Override
|
||||
public String getAuthorEndpoint() throws ParsingException {
|
||||
try {
|
||||
return "https://youtube.com"
|
||||
+ c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
collector.commit(extractor);
|
||||
});
|
||||
collector.commit(extractor);
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFetchPage(Downloader downloader) throws IOException, ExtractionException {
|
||||
DownloadResponse response = downloader.downloadWithHeaders(getUrl());
|
||||
String responseBody = response.getResponseBody();
|
||||
cookies = response.getResponseHeaders().get("Set-Cookie");
|
||||
sessionToken = findValue(responseBody, "XSRF_TOKEN");
|
||||
commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
|
||||
}
|
||||
@Override
|
||||
public void onFetchPage(Downloader downloader) throws IOException, ExtractionException {
|
||||
DownloadResponse response = downloader.get(getUrl());
|
||||
String responseBody = response.getResponseBody();
|
||||
cookies = response.getResponseHeaders().get("Set-Cookie");
|
||||
sessionToken = findValue(responseBody, "XSRF_TOKEN");
|
||||
commentsToken = findValue(responseBody, "COMMENTS_TOKEN");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
private String makeAjaxRequest(String siteUrl) throws IOException {
|
||||
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
|
||||
|
||||
StringBuilder postData = new StringBuilder();
|
||||
postData.append(URLEncoder.encode("session_token", "UTF-8"));
|
||||
postData.append('=');
|
||||
postData.append(URLEncoder.encode(sessionToken, "UTF-8"));
|
||||
byte[] postDataBytes = postData.toString().getBytes("UTF-8");
|
||||
StringBuilder postData = new StringBuilder();
|
||||
postData.append(URLEncoder.encode("session_token", "UTF-8"));
|
||||
postData.append('=');
|
||||
postData.append(URLEncoder.encode(sessionToken, "UTF-8"));
|
||||
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
con.setRequestMethod("POST");
|
||||
con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
|
||||
con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length));
|
||||
con.setRequestProperty("Accept", "*/*");
|
||||
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||
con.setRequestProperty("X-YouTube-Client-Version", "2.20180815");
|
||||
con.setRequestProperty("X-YouTube-Client-Name", "1");
|
||||
// set cookies
|
||||
cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c));
|
||||
con.setDoOutput(true);
|
||||
con.getOutputStream().write(postDataBytes);
|
||||
Map<String, List<String>> requestHeaders = new HashMap<>();
|
||||
requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded"));
|
||||
requestHeaders.put("Accept", Arrays.asList("*/*"));
|
||||
requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT));
|
||||
requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815"));
|
||||
requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1"));
|
||||
requestHeaders.put("Cookie", cookies);
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8"));
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String inputLine;
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
sb.append(inputLine);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
return NewPipe.getDownloader().post(siteUrl, postData.toString(), requestHeaders).getResponseBody();
|
||||
}
|
||||
|
||||
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
|
||||
StringBuilder result = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (Map.Entry<String, String> entry : params.entrySet()) {
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
result.append("&");
|
||||
result.append(URLEncoder.encode(entry.getKey(), "UTF-8"));
|
||||
result.append("=");
|
||||
result.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
|
||||
StringBuilder result = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (Map.Entry<String, String> entry : params.entrySet()) {
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
result.append("&");
|
||||
result.append(URLEncoder.encode(entry.getKey(), "UTF-8"));
|
||||
result.append("=");
|
||||
result.append(URLEncoder.encode(entry.getValue(), "UTF-8"));
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private String findValue(String doc, String key) {
|
||||
int beginIndex = doc.indexOf(key) + key.length() + 4;
|
||||
int endIndex = doc.indexOf("\"", beginIndex);
|
||||
return doc.substring(beginIndex, endIndex);
|
||||
}
|
||||
private String findValue(String doc, String key) {
|
||||
int beginIndex = doc.indexOf(key) + key.length() + 4;
|
||||
int endIndex = doc.indexOf("\"", beginIndex);
|
||||
return doc.substring(beginIndex, endIndex);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,16 +1,5 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URI;
|
||||
|
@ -18,25 +7,16 @@ import java.net.URISyntaxException;
|
|||
import java.net.URLDecoder;
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 25.07.16.
|
||||
*
|
||||
* Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org>
|
||||
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe.
|
||||
*
|
||||
* NewPipe is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* NewPipe is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.Downloader;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||
|
||||
|
|
|
@ -36,144 +36,184 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|||
|
||||
public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
||||
private static String mCookies = "";
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
||||
private static String mCookies = "";
|
||||
|
||||
private static Downloader instance = null;
|
||||
private static Downloader instance = null;
|
||||
|
||||
private Downloader() {
|
||||
}
|
||||
private Downloader() {
|
||||
}
|
||||
|
||||
public static Downloader getInstance() {
|
||||
if (instance == null) {
|
||||
synchronized (Downloader.class) {
|
||||
if (instance == null) {
|
||||
instance = new Downloader();
|
||||
}
|
||||
}
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
public static Downloader getInstance() {
|
||||
if (instance == null) {
|
||||
synchronized (Downloader.class) {
|
||||
if (instance == null) {
|
||||
instance = new Downloader();
|
||||
}
|
||||
}
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
public static synchronized void setCookies(String cookies) {
|
||||
Downloader.mCookies = cookies;
|
||||
}
|
||||
public static synchronized void setCookies(String cookies) {
|
||||
Downloader.mCookies = cookies;
|
||||
}
|
||||
|
||||
public static synchronized String getCookies() {
|
||||
return Downloader.mCookies;
|
||||
}
|
||||
public static synchronized String getCookies() {
|
||||
return Downloader.mCookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param language the language (usually a 2-character code) to set as the
|
||||
* preferred language
|
||||
* @return the contents of the specified text file
|
||||
*/
|
||||
public String download(String siteUrl, String language) throws IOException, ReCaptchaException {
|
||||
Map<String, String> requestProperties = new HashMap<>();
|
||||
requestProperties.put("Accept-Language", language);
|
||||
return download(siteUrl, requestProperties);
|
||||
}
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param language the language (usually a 2-character code) to set as the
|
||||
* preferred language
|
||||
* @return the contents of the specified text file
|
||||
*/
|
||||
public String download(String siteUrl, String language) throws IOException, ReCaptchaException {
|
||||
Map<String, String> requestProperties = new HashMap<>();
|
||||
requestProperties.put("Accept-Language", language);
|
||||
return download(siteUrl, requestProperties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param customProperties set request header properties
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
public String download(String siteUrl, Map<String, String> customProperties)
|
||||
throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
for (Map.Entry<String, String> pair : customProperties.entrySet()) {
|
||||
con.setRequestProperty(pair.getKey(), pair.getValue());
|
||||
}
|
||||
return dl(con);
|
||||
}
|
||||
/**
|
||||
* Download the text file at the supplied URL as in download(String), but set
|
||||
* the HTTP header field "Accept-Language" to the supplied string.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to return the contents of
|
||||
* @param customProperties set request header properties
|
||||
* @return the contents of the specified text file
|
||||
* @throws IOException
|
||||
*/
|
||||
public String download(String siteUrl, Map<String, String> customProperties)
|
||||
throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
for (Map.Entry<String, String> pair : customProperties.entrySet()) {
|
||||
con.setRequestProperty(pair.getKey(), pair.getValue());
|
||||
}
|
||||
return dl(con);
|
||||
}
|
||||
|
||||
/**
|
||||
* Common functionality between download(String url) and download(String url,
|
||||
* String language)
|
||||
*/
|
||||
private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException {
|
||||
StringBuilder response = new StringBuilder();
|
||||
BufferedReader in = null;
|
||||
/**
|
||||
* Common functionality between download(String url) and download(String url,
|
||||
* String language)
|
||||
*/
|
||||
private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException {
|
||||
StringBuilder response = new StringBuilder();
|
||||
BufferedReader in = null;
|
||||
|
||||
try {
|
||||
con.setConnectTimeout(30 * 1000);// 30s
|
||||
con.setReadTimeout(30 * 1000);// 30s
|
||||
con.setRequestMethod("GET");
|
||||
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||
try {
|
||||
|
||||
if (getCookies().length() > 0) {
|
||||
con.addRequestProperty("Cookie", getCookies());
|
||||
}
|
||||
con.setRequestMethod("GET");
|
||||
setDefaults(con);
|
||||
|
||||
in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
||||
String inputLine;
|
||||
in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
||||
String inputLine;
|
||||
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
}
|
||||
} catch (UnknownHostException uhe) {// thrown when there's no internet connection
|
||||
throw new IOException("unknown host or no network", uhe);
|
||||
// Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show();
|
||||
} catch (Exception e) {
|
||||
/*
|
||||
* HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge
|
||||
* request See : https://github.com/rg3/youtube-dl/issues/5138
|
||||
*/
|
||||
if (con.getResponseCode() == 429) {
|
||||
throw new ReCaptchaException("reCaptcha Challenge requested");
|
||||
}
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
response.append(inputLine);
|
||||
}
|
||||
} catch (UnknownHostException uhe) {// thrown when there's no internet
|
||||
// connection
|
||||
throw new IOException("unknown host or no network", uhe);
|
||||
// Toast.makeText(getActivity(), uhe.getMessage(),
|
||||
// Toast.LENGTH_LONG).show();
|
||||
} catch (Exception e) {
|
||||
/*
|
||||
* HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge
|
||||
* request See : https://github.com/rg3/youtube-dl/issues/5138
|
||||
*/
|
||||
if (con.getResponseCode() == 429) {
|
||||
throw new ReCaptchaException("reCaptcha Challenge requested");
|
||||
}
|
||||
|
||||
throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e);
|
||||
} finally {
|
||||
if (in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e);
|
||||
} finally {
|
||||
if (in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
return response.toString();
|
||||
}
|
||||
return response.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||
* contents. Primarily intended for downloading web pages.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to download
|
||||
* @return the contents of the specified text file
|
||||
*/
|
||||
public String download(String siteUrl) throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
||||
return dl(con);
|
||||
}
|
||||
private static void setDefaults(HttpsURLConnection con) {
|
||||
|
||||
@Override
|
||||
public DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
|
||||
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
|
||||
}
|
||||
String responseBody = dl(con);
|
||||
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||
}
|
||||
con.setConnectTimeout(30 * 1000);// 30s
|
||||
con.setReadTimeout(30 * 1000);// 30s
|
||||
|
||||
@Override
|
||||
public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
String responseBody = dl(con);
|
||||
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||
}
|
||||
// set default user agent
|
||||
if (null == con.getRequestProperty("User-Agent")) {
|
||||
con.setRequestProperty("User-Agent", USER_AGENT);
|
||||
}
|
||||
|
||||
// add default cookies
|
||||
if (getCookies().length() > 0) {
|
||||
con.addRequestProperty("Cookie", getCookies());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download (via HTTP) the text file located at the supplied URL, and return its
|
||||
* contents. Primarily intended for downloading web pages.
|
||||
*
|
||||
* @param siteUrl the URL of the text file to download
|
||||
* @return the contents of the specified text file
|
||||
*/
|
||||
public String download(String siteUrl) throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
||||
return dl(con);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadResponse get(String siteUrl, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
|
||||
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
|
||||
}
|
||||
String responseBody = dl(con);
|
||||
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
String responseBody = dl(con);
|
||||
return new DownloadResponse(responseBody, con.getHeaderFields());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DownloadResponse post(String siteUrl, String requestBody, Map<String, List<String>> requestHeaders)
|
||||
throws IOException, ReCaptchaException {
|
||||
URL url = new URL(siteUrl);
|
||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||
con.setRequestMethod("POST");
|
||||
for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) {
|
||||
pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value));
|
||||
}
|
||||
// set fields to default if not set already
|
||||
setDefaults(con);
|
||||
|
||||
byte[] postDataBytes = requestBody.toString().getBytes("UTF-8");
|
||||
con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length));
|
||||
|
||||
con.setDoOutput(true);
|
||||
con.getOutputStream().write(postDataBytes);
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String inputLine;
|
||||
while ((inputLine = in.readLine()) != null) {
|
||||
sb.append(inputLine);
|
||||
}
|
||||
return new DownloadResponse(sb.toString(), con.getHeaderFields());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,31 +16,31 @@ import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsE
|
|||
|
||||
public class YoutubeCommentsExtractorTest {
|
||||
|
||||
private static YoutubeCommentsExtractor extractor;
|
||||
private static YoutubeCommentsExtractor extractor;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
NewPipe.init(Downloader.getInstance());
|
||||
extractor = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
NewPipe.init(Downloader.getInstance());
|
||||
extractor = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetComments() throws IOException, ExtractionException {
|
||||
boolean result = false;
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||
@Test
|
||||
public void testGetComments() throws IOException, ExtractionException {
|
||||
boolean result = false;
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||
|
||||
while (comments.hasNextPage()) {
|
||||
comments = extractor.getPage(comments.getNextPageUrl());
|
||||
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||
}
|
||||
while (comments.hasNextPage()) {
|
||||
comments = extractor.getPage(comments.getNextPageUrl());
|
||||
result = findInComments(comments, "i should really be in the top comment.lol");
|
||||
}
|
||||
|
||||
assertTrue(result);
|
||||
}
|
||||
assertTrue(result);
|
||||
}
|
||||
|
||||
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
||||
return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent();
|
||||
}
|
||||
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
||||
return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue