Bandcamp channel extractor (ignoring everything but tracks)

This commit is contained in:
Fynn Godau 2019-12-22 00:42:26 +01:00
parent 5281456899
commit a42c77425d
11 changed files with 252 additions and 30 deletions

View file

@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.kiosk.KioskList;
import org.schabi.newpipe.extractor.linkhandler.*;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampSearchExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampChannelLinkHandlerFactory;
@ -81,7 +82,7 @@ public class BandcampService extends StreamingService {
@Override
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
return null;
return new BandcampChannelExtractor(this, linkHandler);
}
@Override

View file

@ -0,0 +1,155 @@
// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import javax.annotation.Nonnull;
import java.io.IOException;
public class BandcampChannelExtractor extends ChannelExtractor {
private JSONObject channelInfo;
public BandcampChannelExtractor(StreamingService service, ListLinkHandler linkHandler) throws ParsingException {
super(service, linkHandler);
channelInfo = getArtistDetails(getId());
}
/**
* Fetch artist details from mobile endpoint.
* <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
* I once took a moment to note down how it works.</a>
*/
public static JSONObject getArtistDetails(String id) throws ParsingException {
try {
return
new JSONObject(
NewPipe.getDownloader().post(
"https://bandcamp.com/api/mobile/22/band_details",
null,
("{\"band_id\":\"" + id + "\"}").getBytes()
).responseBody()
);
} catch (IOException | ReCaptchaException e) {
throw new ParsingException("Could not download band details", e);
}
}
/**
* @param id The image ID
* @param album Whether this is the cover of an album
* @return Url of image with this ID in size 10 which is 1200x1200 (we could also choose size 0
* but we don't want something as large as 3460x3460 here, do we?)
*/
public static String getImageUrl(long id, boolean album) {
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
}
@Override
public String getAvatarUrl() {
return getImageUrl(channelInfo.getLong("bio_image_id"), false);
}
/**
* Why does the mobile endpoint not contain the header?? Or at least not the same one?
* Anyway we're back to querying websites
*/
@Override
public String getBannerUrl() throws ParsingException {
try {
String html = getDownloader().get(channelInfo.getString("bandcamp_url"))
.responseBody();
return new Document(html).getElementById("customHeader")
.getElementsByTag("img")
.attr("src");
} catch (IOException | ReCaptchaException e) {
throw new ParsingException("Could not download artist web site", e);
}
}
/**
* I had to learn bandcamp stopped providing RSS feeds when appending /feed to any URL
* because too few people used it. Bummer!
*/
@Override
public String getFeedUrl() {
return null;
}
@Override
public long getSubscriberCount() {
return -1;
}
@Override
public String getDescription() {
return channelInfo.getString("bio");
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JSONArray discography = channelInfo.getJSONArray("discography");
for (int i = 0; i < discography.length(); i++) {
// I define discograph as an item that can appear in a discography
JSONObject discograph = discography.getJSONObject(i);
if (!discograph.getString("item_type").equals("track")) continue;
collector.commit(new BandcampStreamInfoItemExtractor(
discograph.getString("title"),
BandcampExtractorHelper.getStreamUrlFromIds(
discograph.getLong("band_id"),
discograph.getLong("item_id"),
discograph.getString("item_type")
),
getImageUrl(
discograph.getLong("art_id"), true
),
discograph.getString("band_name")
));
}
return new InfoItemsPage<>(collector, null);
}
@Override
public String getNextPageUrl() {
return null;
}
@Override
public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) {
return null;
}
@Override
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
}
@Nonnull
@Override
public String getName() {
return channelInfo.getString("name");
}
}

View file

@ -4,8 +4,11 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import org.json.JSONException;
import org.json.JSONObject;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -21,7 +24,7 @@ public class BandcampExtractorHelper {
* @param variable Name of the variable
* @return The JsonObject stored in the variable with this name
*/
public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ParsingException {
public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ArrayIndexOutOfBoundsException, ParsingException {
String[] part = html.split("var " + variable + " = ");
@ -52,6 +55,26 @@ public class BandcampExtractorHelper {
throw new ParsingException("Unexpected HTML: JSON never ends");
}
/**
* Translate all these parameters together to the URL of the corresponding album or track
* using the mobile api
*/
public static String getStreamUrlFromIds(long bandId, long itemId, String itemType) throws ParsingException {
try {
String html = NewPipe.getDownloader().get(
"https://bandcamp.com/api/mobile/22/tralbum_details?band_id=" + bandId
+ "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.substring(0, 1))
.responseBody();
return new JSONObject(html).getString("bandcamp_url").replace("http://", "https://");
} catch (JSONException | ReCaptchaException | IOException e) {
throw new ParsingException("Ids could not be translated to URL", e);
}
}
/**
* Concatenate all non-null and non-empty strings together while separating them using
* the comma parameter

View file

@ -88,14 +88,12 @@ public class BandcampSearchExtractor extends SearchExtractor {
break;
case "TRACK":
String album = subhead.split("from ")[0].split(" by")[0];
String[] splitBy = subhead.split(" by");
String artist1 = null;
if (splitBy.length > 1) {
artist1 = subhead.split(" by")[1];
}
collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1, album));
collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1));
break;
}

View file

@ -57,6 +57,8 @@ public class BandcampStreamExtractor extends StreamExtractor {
return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData");
} catch (JSONException e) {
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
} catch (ArrayIndexOutOfBoundsException e) {
throw new ParsingException("JSON does not exist", e);
}
}

View file

@ -15,14 +15,12 @@ public class BandcampStreamInfoItemExtractor implements StreamInfoItemExtractor
private String url;
private String cover;
private String artist;
private String albumName;
public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist, String albumName) {
public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist) {
this.title = title;
this.url = url;
this.cover = cover;
this.artist = artist;
this.albumName = albumName;
}
@Override

View file

@ -2,15 +2,14 @@
package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
import org.json.JSONException;
import org.json.JSONObject;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
import org.schabi.newpipe.extractor.utils.ExtractorHelper;
import java.io.IOException;
import java.util.List;
@ -31,35 +30,24 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
return String.valueOf(bandData.getLong("id"));
} catch (IOException | ReCaptchaException e) {
} catch (IOException | ReCaptchaException | ArrayIndexOutOfBoundsException e) {
throw new ParsingException("Download failed", e);
}
}
/**
* Fetch artist details from mobile endpoint, thereby receiving their URL.
* <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
* I once took a moment to note down how it works.</a>
*
* @throws ParsingException
* Uses the mobile endpoint as a "translator" from id to url
*/
@Override
public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
try {
String data = NewPipe.getDownloader().post(
"https://bandcamp.com/api/mobile/22/band_details",
null,
("{\"band_id\":\"" + id + "\"}").getBytes()
).responseBody();
return new JSONObject(data)
return BandcampChannelExtractor.getArtistDetails(id)
.getString("bandcamp_url")
.replace("http://", "https://");
} catch (IOException | ReCaptchaException e) {
throw new ParsingException("Download failed", e);
} catch (JSONException e) {
throw new ParsingException("JSON does not contain URL (invalid id?) or is otherwise invalid", e);
}
}
/**

View file

@ -6,8 +6,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
/**
* Tracks do have IDs, but they are not really useful. That's why id = url.
* Instead, URLs are cleaned up so that they always look the same.
* Tracks don't have standalone ids, they are always in combination with the band id.
* That's why id = url. Instead, URLs are cleaned up so that they always look the same.
*/
public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {

View file

@ -0,0 +1,42 @@
package org.schabi.newpipe.extractor.services.bandcamp;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.schabi.newpipe.extractor.ServiceList.bandcamp;
public class BandcampChannelExtractorTest {
private static BandcampChannelExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (BandcampChannelExtractor) bandcamp
.getChannelExtractor("https://zachbenson.bandcamp.com/");
}
@Test
public void testImageUrl() {
assertEquals("https://f4.bcbits.com/img/a2405652335_10.jpg", BandcampChannelExtractor.getImageUrl(2405652335L, true));
assertEquals("https://f4.bcbits.com/img/17433693_10.jpg", BandcampChannelExtractor.getImageUrl(17433693L, false));
}
@Test
public void testTranslateIdsToUrl() throws ParsingException {
assertEquals("https://zachbenson.bandcamp.com/album/covers", BandcampExtractorHelper.getStreamUrlFromIds(2862267535L, 2063639444L, "album"));
// TODO write more test cases
}
@Test
public void testLength() throws ParsingException {
assertTrue(extractor.getInitialPage().getItems().size() > 2);
}
}

View file

@ -51,4 +51,14 @@ public class BandcampChannelLinkHandlerFactoryTest {
assertEquals("https://infiniteammo.bandcamp.com", linkHandler.getUrl("3321800855"));
}
@Test(expected = ParsingException.class)
public void testGetUrlWithInvalidId() throws ParsingException {
linkHandler.getUrl("0");
}
@Test(expected = ParsingException.class)
public void testGetIdWithInvalidUrl() throws ParsingException {
linkHandler.getId("https://bandcamp.com");
}
}

View file

@ -67,4 +67,9 @@ public class BandcampStreamExtractorTest {
assertEquals(1, extractor.getAudioStreams().size());
}
@Test(expected = ParsingException.class)
public void testInvalidUrl() throws ExtractionException {
bandcamp.getStreamExtractor("https://bandcamp.com");
}
}