[SoundCloud] Fix extraction of client id

- Hardcoded id and check at the first usage.
- As a fallback, and considering that the scripts containing the client
id were all split up, try searching it in each of them.
This commit is contained in:
Mauricio Colli 2019-10-22 13:21:24 -03:00 committed by Tobias Groza
parent 250c0bb1e8
commit 4fc18a6994
2 changed files with 40 additions and 21 deletions

View file

@ -7,9 +7,12 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.DownloadResponse;
import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemsCollector; import org.schabi.newpipe.extractor.channel.ChannelInfoItemsCollector;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
@ -21,6 +24,7 @@ import java.io.IOException;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
@ -32,29 +36,43 @@ public class SoundcloudParsingHelper {
private SoundcloudParsingHelper() { private SoundcloudParsingHelper() {
} }
public static String clientId() throws ReCaptchaException, IOException, RegexException { public static String clientId() throws ExtractionException, IOException {
if (clientId != null && !clientId.isEmpty()) return clientId; if (clientId != null && !clientId.isEmpty()) return clientId;
Downloader dl = NewPipe.getDownloader(); Downloader dl = NewPipe.getDownloader();
String response = dl.download("https://soundcloud.com"); clientId = "LHzSAKe8eP9Yy3FgBugfBapRPLncO6Ng"; // Updated on 22/10/19
final String apiUrl = "https://api.soundcloud.com/connect?client_id=" + clientId;
Document doc = Jsoup.parse(response); // Should return 200 to indicate that the client id is valid, a 401 is returned otherwise.
Element jsElement = doc.select("script[src^=https://a-v2.sndcdn.com/assets/app]").first(); // In that case, the fallback method is used.
if (dl.head(apiUrl).getResponseCode() == 200) {
final String clientIdPattern = ",client_id:\"(.*?)\""; return clientId;
try {
final HashMap<String, String> headers = new HashMap<>();
headers.put("Range", "bytes=0-16384");
String js = dl.download(jsElement.attr("src"), headers);
return clientId = Parser.matchGroup1(clientIdPattern, js);
} catch (IOException | RegexException ignored) {
// Ignore it and proceed to download the whole js file
} }
String js = dl.download(jsElement.attr("src")); final DownloadResponse download = dl.get("https://soundcloud.com");
return clientId = Parser.matchGroup1(clientIdPattern, js); String response = download.getResponseBody();
final String clientIdPattern = ",client_id:\"(.*?)\"";
Document doc = Jsoup.parse(response);
final Elements possibleScripts = doc.select("script[src*=\"sndcdn.com/assets/\"][src$=\".js\"]");
// The one containing the client id will likely be the last one
Collections.reverse(possibleScripts);
final HashMap<String, String> headers = new HashMap<>();
headers.put("Range", "bytes=0-16384");
for (Element element : possibleScripts) {
final String srcUrl = element.attr("src");
if (srcUrl != null && !srcUrl.isEmpty()) {
try {
return clientId = Parser.matchGroup1(clientIdPattern, dl.download(srcUrl, headers));
} catch (RegexException ignored) {
// Ignore it and proceed to try searching other script
}
}
}
// Officially give up
throw new ExtractionException("Couldn't extract client id");
} }
public static String toDateString(String time) throws ParsingException { public static String toDateString(String time) throws ParsingException {
@ -79,7 +97,7 @@ public class SoundcloudParsingHelper {
* *
* See https://developers.soundcloud.com/docs/api/reference#resolve * See https://developers.soundcloud.com/docs/api/reference#resolve
*/ */
public static JsonObject resolveFor(Downloader downloader, String url) throws IOException, ReCaptchaException, ParsingException { public static JsonObject resolveFor(Downloader downloader, String url) throws IOException, ExtractionException {
String apiUrl = "https://api.soundcloud.com/resolve" String apiUrl = "https://api.soundcloud.com/resolve"
+ "?url=" + URLEncoder.encode(url, "UTF-8") + "?url=" + URLEncoder.encode(url, "UTF-8")
+ "&client_id=" + clientId(); + "&client_id=" + clientId();

View file

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud; package org.schabi.newpipe.extractor.services.soundcloud;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
@ -48,10 +49,10 @@ public class SoundcloudSearchQueryHandlerFactory extends SearchQueryHandlerFacto
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new ParsingException("Could not encode query", e); throw new ParsingException("Could not encode query", e);
} catch (IOException e) {
throw new ParsingException("Could not get client id", e);
} catch (ReCaptchaException e) { } catch (ReCaptchaException e) {
throw new ParsingException("ReCaptcha required", e); throw new ParsingException("ReCaptcha required", e);
} catch (IOException | ExtractionException e) {
throw new ParsingException("Could not get client id", e);
} }
} }