Bandcamp channel extractor (ignoring everything but tracks)
This commit is contained in:
parent
5281456899
commit
a42c77425d
11 changed files with 252 additions and 30 deletions
|
@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.kiosk.KioskList;
|
|||
import org.schabi.newpipe.extractor.linkhandler.*;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampSearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampChannelLinkHandlerFactory;
|
||||
|
@ -81,7 +82,7 @@ public class BandcampService extends StreamingService {
|
|||
|
||||
@Override
|
||||
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
||||
return null;
|
||||
return new BandcampChannelExtractor(this, linkHandler);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later
|
||||
|
||||
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
|
||||
public class BandcampChannelExtractor extends ChannelExtractor {
|
||||
|
||||
private JSONObject channelInfo;
|
||||
|
||||
public BandcampChannelExtractor(StreamingService service, ListLinkHandler linkHandler) throws ParsingException {
|
||||
super(service, linkHandler);
|
||||
|
||||
channelInfo = getArtistDetails(getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch artist details from mobile endpoint.
|
||||
* <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
|
||||
* I once took a moment to note down how it works.</a>
|
||||
*/
|
||||
public static JSONObject getArtistDetails(String id) throws ParsingException {
|
||||
try {
|
||||
return
|
||||
new JSONObject(
|
||||
NewPipe.getDownloader().post(
|
||||
"https://bandcamp.com/api/mobile/22/band_details",
|
||||
null,
|
||||
("{\"band_id\":\"" + id + "\"}").getBytes()
|
||||
).responseBody()
|
||||
);
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Could not download band details", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id The image ID
|
||||
* @param album Whether this is the cover of an album
|
||||
* @return Url of image with this ID in size 10 which is 1200x1200 (we could also choose size 0
|
||||
* but we don't want something as large as 3460x3460 here, do we?)
|
||||
*/
|
||||
public static String getImageUrl(long id, boolean album) {
|
||||
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAvatarUrl() {
|
||||
return getImageUrl(channelInfo.getLong("bio_image_id"), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Why does the mobile endpoint not contain the header?? Or at least not the same one?
|
||||
* Anyway we're back to querying websites
|
||||
*/
|
||||
@Override
|
||||
public String getBannerUrl() throws ParsingException {
|
||||
try {
|
||||
String html = getDownloader().get(channelInfo.getString("bandcamp_url"))
|
||||
.responseBody();
|
||||
|
||||
return new Document(html).getElementById("customHeader")
|
||||
.getElementsByTag("img")
|
||||
.attr("src");
|
||||
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Could not download artist web site", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* I had to learn bandcamp stopped providing RSS feeds when appending /feed to any URL
|
||||
* because too few people used it. Bummer!
|
||||
*/
|
||||
@Override
|
||||
public String getFeedUrl() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSubscriberCount() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return channelInfo.getString("bio");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
|
||||
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
|
||||
JSONArray discography = channelInfo.getJSONArray("discography");
|
||||
|
||||
for (int i = 0; i < discography.length(); i++) {
|
||||
// I define discograph as an item that can appear in a discography
|
||||
JSONObject discograph = discography.getJSONObject(i);
|
||||
|
||||
if (!discograph.getString("item_type").equals("track")) continue;
|
||||
|
||||
collector.commit(new BandcampStreamInfoItemExtractor(
|
||||
discograph.getString("title"),
|
||||
BandcampExtractorHelper.getStreamUrlFromIds(
|
||||
discograph.getLong("band_id"),
|
||||
discograph.getLong("item_id"),
|
||||
discograph.getString("item_type")
|
||||
),
|
||||
getImageUrl(
|
||||
discograph.getLong("art_id"), true
|
||||
),
|
||||
discograph.getString("band_name")
|
||||
));
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getName() {
|
||||
return channelInfo.getString("name");
|
||||
}
|
||||
}
|
|
@ -4,8 +4,11 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
|||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
@ -21,7 +24,7 @@ public class BandcampExtractorHelper {
|
|||
* @param variable Name of the variable
|
||||
* @return The JsonObject stored in the variable with this name
|
||||
*/
|
||||
public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ParsingException {
|
||||
public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ArrayIndexOutOfBoundsException, ParsingException {
|
||||
|
||||
String[] part = html.split("var " + variable + " = ");
|
||||
|
||||
|
@ -52,6 +55,26 @@ public class BandcampExtractorHelper {
|
|||
throw new ParsingException("Unexpected HTML: JSON never ends");
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate all these parameters together to the URL of the corresponding album or track
|
||||
* using the mobile api
|
||||
*/
|
||||
public static String getStreamUrlFromIds(long bandId, long itemId, String itemType) throws ParsingException {
|
||||
|
||||
try {
|
||||
String html = NewPipe.getDownloader().get(
|
||||
"https://bandcamp.com/api/mobile/22/tralbum_details?band_id=" + bandId
|
||||
+ "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.substring(0, 1))
|
||||
.responseBody();
|
||||
|
||||
return new JSONObject(html).getString("bandcamp_url").replace("http://", "https://");
|
||||
|
||||
} catch (JSONException | ReCaptchaException | IOException e) {
|
||||
throw new ParsingException("Ids could not be translated to URL", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Concatenate all non-null and non-empty strings together while separating them using
|
||||
* the comma parameter
|
||||
|
|
|
@ -88,14 +88,12 @@ public class BandcampSearchExtractor extends SearchExtractor {
|
|||
break;
|
||||
|
||||
case "TRACK":
|
||||
String album = subhead.split("from ")[0].split(" by")[0];
|
||||
|
||||
String[] splitBy = subhead.split(" by");
|
||||
String artist1 = null;
|
||||
if (splitBy.length > 1) {
|
||||
artist1 = subhead.split(" by")[1];
|
||||
}
|
||||
collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1, album));
|
||||
collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,8 @@ public class BandcampStreamExtractor extends StreamExtractor {
|
|||
return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData");
|
||||
} catch (JSONException e) {
|
||||
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
throw new ParsingException("JSON does not exist", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,14 +15,12 @@ public class BandcampStreamInfoItemExtractor implements StreamInfoItemExtractor
|
|||
private String url;
|
||||
private String cover;
|
||||
private String artist;
|
||||
private String albumName;
|
||||
|
||||
public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist, String albumName) {
|
||||
public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist) {
|
||||
this.title = title;
|
||||
this.url = url;
|
||||
this.cover = cover;
|
||||
this.artist = artist;
|
||||
this.albumName = albumName;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -2,15 +2,14 @@
|
|||
|
||||
package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
|
||||
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
@ -31,35 +30,24 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||
|
||||
return String.valueOf(bandData.getLong("id"));
|
||||
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
} catch (IOException | ReCaptchaException | ArrayIndexOutOfBoundsException e) {
|
||||
throw new ParsingException("Download failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch artist details from mobile endpoint, thereby receiving their URL.
|
||||
* <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
|
||||
* I once took a moment to note down how it works.</a>
|
||||
*
|
||||
* @throws ParsingException
|
||||
* Uses the mobile endpoint as a "translator" from id to url
|
||||
*/
|
||||
@Override
|
||||
public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
|
||||
try {
|
||||
String data = NewPipe.getDownloader().post(
|
||||
"https://bandcamp.com/api/mobile/22/band_details",
|
||||
null,
|
||||
("{\"band_id\":\"" + id + "\"}").getBytes()
|
||||
).responseBody();
|
||||
|
||||
return new JSONObject(data)
|
||||
return BandcampChannelExtractor.getArtistDetails(id)
|
||||
.getString("bandcamp_url")
|
||||
.replace("http://", "https://");
|
||||
|
||||
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Download failed", e);
|
||||
} catch (JSONException e) {
|
||||
throw new ParsingException("JSON does not contain URL (invalid id?) or is otherwise invalid", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -6,8 +6,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||
|
||||
/**
|
||||
* Tracks do have IDs, but they are not really useful. That's why id = url.
|
||||
* Instead, URLs are cleaned up so that they always look the same.
|
||||
* Tracks don't have standalone ids, they are always in combination with the band id.
|
||||
* That's why id = url. Instead, URLs are cleaned up so that they always look the same.
|
||||
*/
|
||||
public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
package org.schabi.newpipe.extractor.services.bandcamp;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.schabi.newpipe.DownloaderTestImpl;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.schabi.newpipe.extractor.ServiceList.bandcamp;
|
||||
|
||||
public class BandcampChannelExtractorTest {
|
||||
|
||||
private static BandcampChannelExtractor extractor;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||
extractor = (BandcampChannelExtractor) bandcamp
|
||||
.getChannelExtractor("https://zachbenson.bandcamp.com/");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testImageUrl() {
|
||||
assertEquals("https://f4.bcbits.com/img/a2405652335_10.jpg", BandcampChannelExtractor.getImageUrl(2405652335L, true));
|
||||
assertEquals("https://f4.bcbits.com/img/17433693_10.jpg", BandcampChannelExtractor.getImageUrl(17433693L, false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTranslateIdsToUrl() throws ParsingException {
|
||||
assertEquals("https://zachbenson.bandcamp.com/album/covers", BandcampExtractorHelper.getStreamUrlFromIds(2862267535L, 2063639444L, "album"));
|
||||
// TODO write more test cases
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLength() throws ParsingException {
|
||||
assertTrue(extractor.getInitialPage().getItems().size() > 2);
|
||||
}
|
||||
}
|
|
@ -51,4 +51,14 @@ public class BandcampChannelLinkHandlerFactoryTest {
|
|||
assertEquals("https://infiniteammo.bandcamp.com", linkHandler.getUrl("3321800855"));
|
||||
}
|
||||
|
||||
@Test(expected = ParsingException.class)
|
||||
public void testGetUrlWithInvalidId() throws ParsingException {
|
||||
linkHandler.getUrl("0");
|
||||
}
|
||||
|
||||
@Test(expected = ParsingException.class)
|
||||
public void testGetIdWithInvalidUrl() throws ParsingException {
|
||||
linkHandler.getId("https://bandcamp.com");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -67,4 +67,9 @@ public class BandcampStreamExtractorTest {
|
|||
assertEquals(1, extractor.getAudioStreams().size());
|
||||
}
|
||||
|
||||
@Test(expected = ParsingException.class)
|
||||
public void testInvalidUrl() throws ExtractionException {
|
||||
bandcamp.getStreamExtractor("https://bandcamp.com");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue