2020-11-12 21:19:45 +00:00
|
|
|
package me.kavin.piped.utils;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
2021-03-04 14:14:52 +00:00
|
|
|
import java.net.HttpCookie;
|
2020-11-12 21:19:45 +00:00
|
|
|
import java.net.URI;
|
|
|
|
import java.net.http.HttpRequest;
|
|
|
|
import java.net.http.HttpRequest.BodyPublisher;
|
2021-03-04 14:14:52 +00:00
|
|
|
import java.net.http.HttpRequest.BodyPublishers;
|
2020-11-12 21:19:45 +00:00
|
|
|
import java.net.http.HttpRequest.Builder;
|
|
|
|
import java.net.http.HttpResponse;
|
|
|
|
import java.net.http.HttpResponse.BodyHandlers;
|
2021-03-04 14:14:52 +00:00
|
|
|
import java.util.Map;
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-03-04 14:14:52 +00:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Element;
|
2020-11-12 21:19:45 +00:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Request;
|
|
|
|
import org.schabi.newpipe.extractor.downloader.Response;
|
|
|
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|
|
|
|
2021-03-04 14:14:52 +00:00
|
|
|
import com.grack.nanojson.JsonParserException;
|
|
|
|
|
|
|
|
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
|
2020-11-12 21:19:45 +00:00
|
|
|
import me.kavin.piped.consts.Constants;
|
2021-03-04 14:14:52 +00:00
|
|
|
import me.kavin.piped.utils.obj.SolvedCaptcha;
|
2020-11-12 21:19:45 +00:00
|
|
|
|
|
|
|
public class DownloaderImpl extends Downloader {
|
|
|
|
|
2021-03-04 14:14:52 +00:00
|
|
|
private static HttpCookie saved_cookie;
|
|
|
|
private static final Object cookie_lock = new Object();
|
|
|
|
|
2020-11-12 21:19:45 +00:00
|
|
|
/**
|
|
|
|
* Executes a request with HTTP/2.
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public Response execute(Request request) throws IOException, ReCaptchaException {
|
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
// TODO: HTTP/3 aka QUIC
|
|
|
|
Builder builder = HttpRequest.newBuilder(URI.create(request.url()));
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
byte[] data = request.dataToSend();
|
|
|
|
BodyPublisher publisher = data == null ? HttpRequest.BodyPublishers.noBody()
|
|
|
|
: HttpRequest.BodyPublishers.ofByteArray(data);
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
builder.method(request.httpMethod(), publisher);
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
builder.setHeader("User-Agent", Constants.USER_AGENT);
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-03-04 14:14:52 +00:00
|
|
|
if (saved_cookie != null && !saved_cookie.hasExpired())
|
|
|
|
builder.setHeader("Cookie", saved_cookie.getName() + "=" + saved_cookie.getValue());
|
|
|
|
|
|
|
|
request.headers().forEach((name, values) -> values.forEach(value -> builder.header(name, value)));
|
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
HttpResponse<String> response = null;
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
try {
|
|
|
|
response = Constants.h2client.send(builder.build(), BodyHandlers.ofString());
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
// ignored
|
|
|
|
}
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
if (response.statusCode() == 429) {
|
2021-03-04 14:14:52 +00:00
|
|
|
|
|
|
|
synchronized (cookie_lock) {
|
|
|
|
|
|
|
|
if (saved_cookie != null && saved_cookie.hasExpired())
|
|
|
|
saved_cookie = null;
|
|
|
|
|
|
|
|
String redir_url = String.valueOf(response.request().uri());
|
|
|
|
|
|
|
|
if (saved_cookie == null && redir_url.startsWith("https://www.google.com/sorry")) {
|
|
|
|
|
|
|
|
Map<String, String> formParams = new Object2ObjectOpenHashMap<>();
|
|
|
|
String sitekey = null, data_s = null;
|
|
|
|
|
|
|
|
for (Element el : Jsoup.parse(response.body()).selectFirst("form").children()) {
|
|
|
|
String name;
|
|
|
|
if (!(name = el.tagName()).equals("script")) {
|
|
|
|
if (name.equals("input"))
|
|
|
|
formParams.put(el.attr("name"), el.attr("value"));
|
|
|
|
else if (name.equals("div") && el.attr("id").equals("recaptcha")) {
|
|
|
|
sitekey = el.attr("data-sitekey");
|
|
|
|
data_s = el.attr("data-s");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (sitekey == null || data_s == null)
|
|
|
|
throw new ReCaptchaException("Could not get recaptcha", redir_url);
|
|
|
|
|
|
|
|
SolvedCaptcha solved = null;
|
|
|
|
|
|
|
|
try {
|
|
|
|
solved = CaptchaSolver.solve(redir_url, sitekey, data_s);
|
|
|
|
} catch (JsonParserException | InterruptedException e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
|
|
|
|
formParams.put("g-recaptcha-response", solved.getRecaptchaResponse());
|
|
|
|
|
|
|
|
Builder formBuilder = HttpRequest.newBuilder(URI.create("https://www.google.com/sorry/index"));
|
|
|
|
|
|
|
|
formBuilder.setHeader("User-Agent", Constants.USER_AGENT);
|
|
|
|
|
|
|
|
StringBuilder formBody = new StringBuilder();
|
|
|
|
|
|
|
|
formParams.forEach((name, value) -> {
|
|
|
|
formBody.append(name + "=" + URLUtils.silentEncode(value) + "&");
|
|
|
|
});
|
|
|
|
|
|
|
|
formBuilder.header("content-type", "application/x-www-form-urlencoded");
|
|
|
|
|
|
|
|
formBuilder.method("POST",
|
|
|
|
BodyPublishers.ofString(String.valueOf(formBody.substring(0, formBody.length() - 1))));
|
|
|
|
|
|
|
|
try {
|
|
|
|
HttpResponse<String> formResponse = Constants.h2_no_redir_client.send(formBuilder.build(),
|
|
|
|
BodyHandlers.ofString());
|
|
|
|
|
|
|
|
saved_cookie = HttpCookie.parse(URLUtils.silentDecode(StringUtils
|
|
|
|
.substringAfter(formResponse.headers().firstValue("Location").get(), "google_abuse=")))
|
|
|
|
.get(0);
|
|
|
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (saved_cookie != null) // call again as captcha has been solved or cookie has not expired.
|
|
|
|
execute(request);
|
|
|
|
}
|
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
}
|
2020-11-12 21:19:45 +00:00
|
|
|
|
2021-02-24 09:52:29 +00:00
|
|
|
return new Response(response.statusCode(), "UNDEFINED", response.headers().map(), response.body(),
|
|
|
|
String.valueOf(response.uri()));
|
2020-11-12 21:19:45 +00:00
|
|
|
}
|
|
|
|
}
|