From 70d9e389b938dcba0246a0f10e6ff811ecf0a275 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sat, 13 Mar 2021 20:51:54 +0100 Subject: [PATCH 1/2] [Bandcamp] Add tests for external bandcamp artists Necessary, because the external pages tested before were converted to bandcamp.com pages. See f9d06252f2978a3e96b2da1591258aed298f90e3 --- .../BandcampChannelLinkHandlerFactoryTest.java | 12 ++++++++++-- .../BandcampStreamLinkHandlerFactoryTest.java | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java index fcadb25f..14c54c5a 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java @@ -29,6 +29,8 @@ public class BandcampChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com")); assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/")); assertTrue(linkHandler.acceptUrl("https://billwurtz.bandcamp.com/releases")); + assertTrue(linkHandler.acceptUrl("https://interovgm.bandcamp.com/releases")); + assertTrue(linkHandler.acceptUrl("https://interovgm.bandcamp.com/releases/")); assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com/")); @@ -38,8 +40,9 @@ public class BandcampChannelLinkHandlerFactoryTest { assertFalse(linkHandler.acceptUrl("https://daily.bandcamp.com/best-of-2020/bandcamp-daily-staffers-on-their-favorite-albums-of-2020")); // External URLs - assertTrue(linkHandler.acceptUrl("https://interovgm.bandcamp.com/releases")); - assertTrue(linkHandler.acceptUrl("https://interovgm.bandcamp.com/releases/")); + assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com")); + assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music")); + assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music/")); assertFalse(linkHandler.acceptUrl("https://example.com/releases")); } @@ -51,6 +54,9 @@ public class BandcampChannelLinkHandlerFactoryTest { assertEquals("1581461772", linkHandler.getId("https://interovgm.bandcamp.com/releases")); assertEquals("3321800855", linkHandler.getId("https://infiniteammo.bandcamp.com/")); assertEquals("3775652329", linkHandler.getId("https://npet.bandcamp.com/")); + + assertEquals("2735462545", linkHandler.getId("http://lobstertheremin.com/")); + assertEquals("2735462545", linkHandler.getId("https://lobstertheremin.com/music/")); } @Test @@ -58,6 +64,8 @@ public class BandcampChannelLinkHandlerFactoryTest { assertEquals("https://macbenson.bandcamp.com", linkHandler.getUrl("1196681540")); assertEquals("https://interovgm.bandcamp.com", linkHandler.getUrl("1581461772")); assertEquals("https://infiniteammo.bandcamp.com", linkHandler.getUrl("3321800855")); + + assertEquals("https://lobstertheremin.com", linkHandler.getUrl("2735462545")); } @Test(expected = ParsingException.class) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java index 52512ac4..cb48ddbb 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampStreamLinkHandlerFactoryTest.java @@ -49,5 +49,6 @@ public class BandcampStreamLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://interovgm.bandcamp.com/track/title")); assertTrue(linkHandler.acceptUrl("http://bandcamP.com/?show=38")); assertTrue(linkHandler.acceptUrl("https://goodgoodblood-tl.bandcamp.com/track/when-it-all-wakes-up")); + assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/track/unfinished")); } } From 2e57a8f24f268e091f7d1a4ff5335d2f417956a4 Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Sun, 14 Mar 2021 09:48:22 +0100 Subject: [PATCH 2/2] [Bandcamp] Fix link handler acceptance behaviour * Test for bandcamp footer instead of meta tag (which is not present on all pages) * Accept links to /music, not just /releases * Correctly handle uppercase URLs --- .../extractors/BandcampExtractorHelper.java | 17 +++++++++-------- .../BandcampChannelLinkHandlerFactory.java | 10 +++++++--- .../BandcampChannelLinkHandlerFactoryTest.java | 5 +++++ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java index 547a0356..970cddc9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java @@ -7,20 +7,15 @@ import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonWriter; import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.localization.DateWrapper; -import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; import java.time.DateTimeException; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import java.util.Locale; public class BandcampExtractorHelper { @@ -95,12 +90,18 @@ public class BandcampExtractorHelper { if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true; try { - // Accept all other URLs if they contain a tag that says they are generated by bandcamp + // Test other URLs for whether they contain a footer that links to bandcamp return Jsoup.parse( NewPipe.getDownloader().get(url).responseBody() ) - .getElementsByAttributeValue("name", "generator") - .attr("content").equals("Bandcamp"); + .getElementById("pgFt") + .getElementById("pgFt-inner") + .getElementById("footer-logo-wrapper") + .getElementById("footer-logo") + .getElementsByClass("hiddenAccess") + .text().equals("Bandcamp"); + } catch (NullPointerException e) { + return false; } catch (IOException | ReCaptchaException e) { throw new ParsingException("Could not determine whether URL is custom domain " + "(not available? network error?)"); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java index 727aec40..05caf114 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java @@ -55,7 +55,9 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { * Accepts only pages that lead to the root of an artist profile. Supports external pages. */ @Override - public boolean onAcceptUrl(final String url) throws ParsingException { + public boolean onAcceptUrl(String url) throws ParsingException { + + url = url.toLowerCase(); // https: | | artist.bandcamp.com | releases // 0 1 2 3 @@ -64,8 +66,10 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { // URL is too short if (splitUrl.length < 3) return false; - // Must have "releases" as segment after url or none at all - if (splitUrl.length > 3 && !splitUrl[3].equals("releases")) { + // Must have "releases" or "music" as segment after url or none at all + if (splitUrl.length > 3 && !( + splitUrl[3].equals("releases") || splitUrl[3].equals("music") + )) { return false; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java index 14c54c5a..85c18273 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampChannelLinkHandlerFactoryTest.java @@ -37,12 +37,16 @@ public class BandcampChannelLinkHandlerFactoryTest { assertFalse(linkHandler.acceptUrl("https://bandcamp.com")); assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen")); assertFalse(linkHandler.acceptUrl("https://daily.bandcamp.com/")); + assertFalse(linkHandler.acceptUrl("https://DAILY.BANDCAMP.COM")); assertFalse(linkHandler.acceptUrl("https://daily.bandcamp.com/best-of-2020/bandcamp-daily-staffers-on-their-favorite-albums-of-2020")); // External URLs assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com")); assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music")); assertTrue(linkHandler.acceptUrl("https://lobstertheremin.com/music/")); + assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/")); + assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/releases")); + assertTrue(linkHandler.acceptUrl("https://diskak.usopop.com/RELEASES")); assertFalse(linkHandler.acceptUrl("https://example.com/releases")); } @@ -57,6 +61,7 @@ public class BandcampChannelLinkHandlerFactoryTest { assertEquals("2735462545", linkHandler.getId("http://lobstertheremin.com/")); assertEquals("2735462545", linkHandler.getId("https://lobstertheremin.com/music/")); + assertEquals("3826445168", linkHandler.getId("https://diskak.usopop.com/releases")); } @Test