From 04dd3d4d3266d3b2cf2d8afca8518209431c17aa Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Sat, 5 Dec 2020 15:08:26 +0100 Subject: [PATCH] Rework link handlers to correctly accept external websites --- .../extractors/BandcampExtractorHelper.java | 22 +++++++++++++++++++ .../BandcampChannelLinkHandlerFactory.java | 16 ++++++-------- .../BandcampPlaylistLinkHandlerFactory.java | 13 +++++++++-- .../BandcampStreamLinkHandlerFactory.java | 22 +++++++++++-------- ...BandcampChannelLinkHandlerFactoryTest.java | 10 +++++++-- ...andcampPlaylistLinkHandlerFactoryTest.java | 1 + .../BandcampStreamLinkHandlerFactoryTest.java | 1 + 7 files changed, 63 insertions(+), 22 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java index 5d0fb45f..6508de10 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java @@ -123,6 +123,28 @@ public class BandcampExtractorHelper { return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg"; } + /** + * @return true if the given url looks like it comes from a bandcamp custom domain + * or if it comes from bandcamp.com itself + */ + public static boolean isSupportedDomain(final String url) throws ParsingException { + + // Accept all bandcamp.com URLs + if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true; + + try { + // Accept all other URLs if they contain a tag that says they are generated by bandcamp + return Jsoup.parse( + NewPipe.getDownloader().get(url).responseBody() + ) + .getElementsByAttributeValue("name", "generator") + .attr("content").equals("Bandcamp"); + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Could not determine whether URL is custom domain " + + "(not available? network error?)"); + } + } + static DateWrapper parseDate(final String textDate) throws ParsingException { try { final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java index 9fbc8ae8..762c5d9b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java @@ -24,7 +24,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { try { final String response = NewPipe.getDownloader().get(url).responseBody(); - // This variable contains band data! + // Use band data embedded in website to extract ID final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band"); return String.valueOf(bandData.getLong("id")); @@ -51,17 +51,15 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { } /** - * Matches * .bandcamp.com as well as custom domains - * where the profile is at * . * /releases + * Accepts only pages that do not lead to an album or track. Supports external pages. */ @Override - public boolean onAcceptUrl(final String url) { + public boolean onAcceptUrl(final String url) throws ParsingException { - // Is a subdomain of bandcamp.com? - boolean isBandcampComArtistPage = url.matches("https?://.+\\.bandcamp\\.com/?"); + // Exclude URLs that lead to a track or album + if (url.matches(".*/(album|track)/.*")) return false; - boolean isCustomDomainReleases = url.matches("https?://.+\\..+/releases/?(?!.)"); - - return isBandcampComArtistPage || isCustomDomainReleases; + // Test whether domain is supported + return BandcampExtractorHelper.isSupportedDomain(url); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java index b030fcc5..b0fd2d50 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampPlaylistLinkHandlerFactory.java @@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper; import java.util.List; @@ -22,8 +23,16 @@ public class BandcampPlaylistLinkHandlerFactory extends ListLinkHandlerFactory { return url; } + /** + * Accepts all bandcamp URLs that contain /album/ behind their domain name. + */ @Override - public boolean onAcceptUrl(final String url) { - return url.toLowerCase().matches("https?://.+\\..+/album/.+"); + public boolean onAcceptUrl(final String url) throws ParsingException { + + // Exclude URLs which do not lead to an album + if (!url.toLowerCase().matches("https?://.+\\..+/album/.+")) return false; + + // Test whether domain is supported + return BandcampExtractorHelper.isSupportedDomain(url); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java index 5de345e1..fa0435da 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampStreamLinkHandlerFactory.java @@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper; /** *

Tracks don't have standalone ids, they are always in combination with the band id. @@ -40,16 +41,19 @@ public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory { } /** - * Sometimes, the root page of an artist is also an album or track - * page. In that case, it is assumed that one actually wants to open - * the profile and not the track it has set as the default one. - *

Urls are expected to be in this format to account for - * custom domains:

- * https:// * . * /track/ * + * Accepts URLs that point to a bandcamp radio show or that are a bandcamp + * domain and point to a track. */ @Override - public boolean onAcceptUrl(final String url) { - return url.toLowerCase().matches("https?://.+\\..+/track/.+") - || url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+"); + public boolean onAcceptUrl(final String url) throws ParsingException { + + // Accept Bandcamp radio + if (url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+")) return true; + + // Don't accept URLs that don't point to a track + if (!url.toLowerCase().matches("https?://.+\\..+/track/.+")) return false; + + // Test whether domain is supported + return BandcampExtractorHelper.isSupportedDomain(url); } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java index e07daf7d..8886f649 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java @@ -26,13 +26,19 @@ public class BandcampChannelLinkHandlerFactoryTest { @Test public void testAcceptUrl() throws ParsingException { - assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/")); - assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases")); + // Bandcamp URLs assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); + assertTrue(linkHandler.acceptUrl("https://billwurtz.bandcamp.com/releases")); assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); + + // External URLs + assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/")); + assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases")); + + assertFalse(linkHandler.acceptUrl("https://example.com/releases")); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java index 11fe3acd..561d8f7b 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampPlaylistLinkHandlerFactoryTest.java @@ -35,6 +35,7 @@ public class BandcampPlaylistLinkHandlerFactoryTest { assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title")); + assertFalse(linkHandler.acceptUrl("https://example.com/album/samplealbum")); assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom")); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java index 69a7c2bf..a65b0956 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java @@ -43,6 +43,7 @@ public class BandcampStreamLinkHandlerFactoryTest { assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids")); + assertFalse(linkHandler.acceptUrl("https://example.com/track/sampletrack")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/"));