Bandcamp channel extractor (ignoring everything but tracks)

2019-12-22 00:42:26 +01:00 · 2019-12-22 00:42:26 +01:00 · a42c77425d
commit a42c77425d
parent 5281456899
11 changed files with 252 additions and 30 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampService.java
@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.kiosk.KioskList;
 import org.schabi.newpipe.extractor.linkhandler.*;
 import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
 import org.schabi.newpipe.extractor.search.SearchExtractor;
+import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
 import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampSearchExtractor;
 import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
 import org.schabi.newpipe.extractor.services.bandcamp.linkHandler.BandcampChannelLinkHandlerFactory;
@ -81,7 +82,7 @@ public class BandcampService extends StreamingService {

    @Override
    public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
-        return null;
+        return new BandcampChannelExtractor(this, linkHandler);
    }

    @Override
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampChannelExtractor.java
@ -0,0 +1,155 @@
+// Created by Fynn Godau 2019, licensed GNU GPL version 3 or later
+
+package org.schabi.newpipe.extractor.services.bandcamp.extractors;
+
+import org.json.JSONArray;
+import org.json.JSONObject;
+import org.jsoup.nodes.Document;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.StreamingService;
+import org.schabi.newpipe.extractor.channel.ChannelExtractor;
+import org.schabi.newpipe.extractor.downloader.Downloader;
+import org.schabi.newpipe.extractor.exceptions.ExtractionException;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
+import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
+import org.schabi.newpipe.extractor.stream.StreamInfoItem;
+import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
+
+import javax.annotation.Nonnull;
+import java.io.IOException;
+
+public class BandcampChannelExtractor extends ChannelExtractor {
+
+    private JSONObject channelInfo;
+
+    public BandcampChannelExtractor(StreamingService service, ListLinkHandler linkHandler) throws ParsingException {
+        super(service, linkHandler);
+
+        channelInfo = getArtistDetails(getId());
+    }
+
+    /**
+     * Fetch artist details from mobile endpoint.
+     * <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
+     * I once took a moment to note down how it works.</a>
+     */
+    public static JSONObject getArtistDetails(String id) throws ParsingException {
+        try {
+            return
+                    new JSONObject(
+                            NewPipe.getDownloader().post(
+                                    "https://bandcamp.com/api/mobile/22/band_details",
+                                    null,
+                                    ("{\"band_id\":\"" + id + "\"}").getBytes()
+                            ).responseBody()
+                    );
+        } catch (IOException | ReCaptchaException e) {
+            throw new ParsingException("Could not download band details", e);
+        }
+    }
+
+    /**
+     * @param id    The image ID
+     * @param album Whether this is the cover of an album
+     * @return Url of image with this ID in size 10 which is 1200x1200 (we could also choose size 0
+     * but we don't want something as large as 3460x3460 here, do we?)
+     */
+    public static String getImageUrl(long id, boolean album) {
+        return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
+    }
+
+    @Override
+    public String getAvatarUrl() {
+        return getImageUrl(channelInfo.getLong("bio_image_id"), false);
+    }
+
+    /**
+     * Why does the mobile endpoint not contain the header?? Or at least not the same one?
+     * Anyway we're back to querying websites
+     */
+    @Override
+    public String getBannerUrl() throws ParsingException {
+        try {
+            String html = getDownloader().get(channelInfo.getString("bandcamp_url"))
+                    .responseBody();
+
+            return new Document(html).getElementById("customHeader")
+                    .getElementsByTag("img")
+                    .attr("src");
+
+        } catch (IOException | ReCaptchaException e) {
+            throw new ParsingException("Could not download artist web site", e);
+        }
+    }
+
+    /**
+     * I had to learn bandcamp stopped providing RSS feeds when appending /feed to any URL
+     * because too few people used it. Bummer!
+     */
+    @Override
+    public String getFeedUrl() {
+        return null;
+    }
+
+    @Override
+    public long getSubscriberCount() {
+        return -1;
+    }
+
+    @Override
+    public String getDescription() {
+        return channelInfo.getString("bio");
+    }
+
+    @Nonnull
+    @Override
+    public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
+
+        StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
+
+        JSONArray discography = channelInfo.getJSONArray("discography");
+
+        for (int i = 0; i < discography.length(); i++) {
+            // I define discograph as an item that can appear in a discography
+            JSONObject discograph = discography.getJSONObject(i);
+
+            if (!discograph.getString("item_type").equals("track")) continue;
+
+            collector.commit(new BandcampStreamInfoItemExtractor(
+                    discograph.getString("title"),
+                    BandcampExtractorHelper.getStreamUrlFromIds(
+                            discograph.getLong("band_id"),
+                            discograph.getLong("item_id"),
+                            discograph.getString("item_type")
+                    ),
+                    getImageUrl(
+                            discograph.getLong("art_id"), true
+                    ),
+                    discograph.getString("band_name")
+            ));
+        }
+
+        return new InfoItemsPage<>(collector, null);
+    }
+
+    @Override
+    public String getNextPageUrl() {
+        return null;
+    }
+
+    @Override
+    public InfoItemsPage<StreamInfoItem> getPage(String pageUrl) {
+        return null;
+    }
+
+    @Override
+    public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
+    }
+
+    @Nonnull
+    @Override
+    public String getName() {
+        return channelInfo.getString("name");
+    }
+}
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java
@ -4,8 +4,11 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;

 import org.json.JSONException;
 import org.json.JSONObject;
+import org.schabi.newpipe.extractor.NewPipe;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;

+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;

@ -21,7 +24,7 @@ public class BandcampExtractorHelper {
     * @param variable Name of the variable
     * @return The JsonObject stored in the variable with this name
     */
-    public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ParsingException {
+    public static JSONObject getJSONFromJavaScriptVariables(String html, String variable) throws JSONException, ArrayIndexOutOfBoundsException, ParsingException {

        String[] part = html.split("var " + variable + " = ");

@ -52,6 +55,26 @@ public class BandcampExtractorHelper {
        throw new ParsingException("Unexpected HTML: JSON never ends");
    }

+    /**
+     * Translate all these parameters together to the URL of the corresponding album or track
+     * using the mobile api
+     */
+    public static String getStreamUrlFromIds(long bandId, long itemId, String itemType) throws ParsingException {
+
+        try {
+            String html = NewPipe.getDownloader().get(
+                    "https://bandcamp.com/api/mobile/22/tralbum_details?band_id=" + bandId
+                            + "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.substring(0, 1))
+                    .responseBody();
+
+            return new JSONObject(html).getString("bandcamp_url").replace("http://", "https://");
+
+        } catch (JSONException | ReCaptchaException | IOException e) {
+            throw new ParsingException("Ids could not be translated to URL", e);
+        }
+
+    }
+
    /**
     * Concatenate all non-null and non-empty strings together while separating them using
     * the comma parameter
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java
@ -88,14 +88,12 @@ public class BandcampSearchExtractor extends SearchExtractor {
                    break;

                case "TRACK":
-                    String album = subhead.split("from ")[0].split(" by")[0];
-
                    String[] splitBy = subhead.split(" by");
                    String artist1 = null;
                    if (splitBy.length > 1) {
                        artist1 = subhead.split(" by")[1];
                    }
-                    collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1, album));
+                    collector.commit(new BandcampStreamInfoItemExtractor(heading, url, image, artist1));
                    break;
            }

--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java
@ -57,6 +57,8 @@ public class BandcampStreamExtractor extends StreamExtractor {
            return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData");
        } catch (JSONException e) {
            throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
+        } catch (ArrayIndexOutOfBoundsException e) {
+            throw new ParsingException("JSON does not exist", e);
        }
    }

--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamInfoItemExtractor.java
@ -15,14 +15,12 @@ public class BandcampStreamInfoItemExtractor implements StreamInfoItemExtractor
    private String url;
    private String cover;
    private String artist;
-    private String albumName;

-    public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist, String albumName) {
+    public BandcampStreamInfoItemExtractor(String title, String url, String cover, String artist) {
        this.title = title;
        this.url = url;
        this.cover = cover;
        this.artist = artist;
-        this.albumName = albumName;
    }

    @Override
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java
@ -2,15 +2,14 @@

 package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;

+import org.json.JSONException;
 import org.json.JSONObject;
 import org.schabi.newpipe.extractor.NewPipe;
-import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
 import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
+import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
 import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
-import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor;
-import org.schabi.newpipe.extractor.utils.ExtractorHelper;

 import java.io.IOException;
 import java.util.List;
@ -31,35 +30,24 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {

            return String.valueOf(bandData.getLong("id"));

-        } catch (IOException | ReCaptchaException e) {
+        } catch (IOException | ReCaptchaException | ArrayIndexOutOfBoundsException e) {
            throw new ParsingException("Download failed", e);
        }
    }

    /**
-     * Fetch artist details from mobile endpoint, thereby receiving their URL.
-     * <a href=https://notabug.org/fynngodau/bandcampDirect/wiki/rewindBandcamp+%E2%80%93+Fetching+artist+details>
-     * I once took a moment to note down how it works.</a>
-     *
-     * @throws ParsingException
+     * Uses the mobile endpoint as a "translator" from id to url
     */
    @Override
    public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException {
        try {
-            String data = NewPipe.getDownloader().post(
-                    "https://bandcamp.com/api/mobile/22/band_details",
-                    null,
-                    ("{\"band_id\":\"" + id + "\"}").getBytes()
-            ).responseBody();
-
-            return new JSONObject(data)
+            return BandcampChannelExtractor.getArtistDetails(id)
                    .getString("bandcamp_url")
                    .replace("http://", "https://");
-
-
-        } catch (IOException | ReCaptchaException e) {
-            throw new ParsingException("Download failed", e);
+        } catch (JSONException e) {
+            throw new ParsingException("JSON does not contain URL (invalid id?) or is otherwise invalid", e);
        }
+
    }

    /**
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java
@ -6,8 +6,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;

 /**
- * Tracks do have IDs, but they are not really useful. That's why id = url.
- * Instead, URLs are cleaned up so that they always look the same.
+ * Tracks don't have standalone ids, they are always in combination with the band id.
+ * That's why id = url. Instead, URLs are cleaned up so that they always look the same.
 */
 public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {

--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelExtractorTest.java
@ -0,0 +1,42 @@
+package org.schabi.newpipe.extractor.services.bandcamp;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.schabi.newpipe.DownloaderTestImpl;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampChannelExtractor;
+import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.schabi.newpipe.extractor.ServiceList.bandcamp;
+
+public class BandcampChannelExtractorTest {
+
+    private static BandcampChannelExtractor extractor;
+
+    @BeforeClass
+    public static void setUp() throws Exception {
+        NewPipe.init(DownloaderTestImpl.getInstance());
+        extractor = (BandcampChannelExtractor) bandcamp
+                .getChannelExtractor("https://zachbenson.bandcamp.com/");
+    }
+
+    @Test
+    public void testImageUrl() {
+        assertEquals("https://f4.bcbits.com/img/a2405652335_10.jpg", BandcampChannelExtractor.getImageUrl(2405652335L, true));
+        assertEquals("https://f4.bcbits.com/img/17433693_10.jpg", BandcampChannelExtractor.getImageUrl(17433693L, false));
+    }
+
+    @Test
+    public void testTranslateIdsToUrl() throws ParsingException {
+        assertEquals("https://zachbenson.bandcamp.com/album/covers", BandcampExtractorHelper.getStreamUrlFromIds(2862267535L, 2063639444L, "album"));
+        // TODO write more test cases
+    }
+
+    @Test
+    public void testLength() throws ParsingException {
+        assertTrue(extractor.getInitialPage().getItems().size() > 2);
+    }
+}
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java
@ -51,4 +51,14 @@ public class BandcampChannelLinkHandlerFactoryTest {
        assertEquals("https://infiniteammo.bandcamp.com", linkHandler.getUrl("3321800855"));
    }

+    @Test(expected = ParsingException.class)
+    public void testGetUrlWithInvalidId() throws ParsingException {
+        linkHandler.getUrl("0");
+    }
+
+    @Test(expected = ParsingException.class)
+    public void testGetIdWithInvalidUrl() throws ParsingException {
+        linkHandler.getId("https://bandcamp.com");
+    }
+
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamExtractorTest.java
@ -67,4 +67,9 @@ public class BandcampStreamExtractorTest {
        assertEquals(1, extractor.getAudioStreams().size());
    }

+    @Test(expected = ParsingException.class)
+    public void testInvalidUrl() throws ExtractionException {
+        bandcamp.getStreamExtractor("https://bandcamp.com");
+    }
+
 }