From 9b16baffb7438af1f3b79f365cdc01a2a32dadbc Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Sun, 22 Dec 2019 13:51:17 +0100 Subject: [PATCH] Bandcamp search: multiple pages --- .../extractors/BandcampSearchExtractor.java | 34 ++++++++++++++++--- .../BandcampSearchQueryHandlerFactory.java | 3 +- .../bandcamp/BandcampSearchExtractorTest.java | 13 +++++++ ...BandcampSearchQueryHandlerFactoryTest.java | 4 +-- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java index f31a5065..31b1b4b8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampSearchExtractor.java @@ -89,19 +89,45 @@ public class BandcampSearchExtractor extends SearchExtractor { } + // Count pages + Elements pageLists = d.getElementsByClass("pagelist"); + if (pageLists.size() == 0) + return new InfoItemsPage<>(collector, null); + + Elements pages = pageLists.first().getElementsByTag("li"); + + // Find current page + int currentPage = -1; + for (int i = 0; i < pages.size(); i++) { + Element page = pages.get(i); + if (page.getElementsByTag("span").size() > 0) { + currentPage = i + 1; + break; + } + } + + // Search results appear to be capped at six pages + assert pages.size() < 10; + + String nextUrl = null; + if (currentPage < pages.size()) { + nextUrl = pageUrl.substring(0, pageUrl.length() - 1) + (currentPage + 1); + } + + return new InfoItemsPage<>(collector, nextUrl); - return new InfoItemsPage<>(getInfoItemSearchCollector(), null); } @Nonnull @Override public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - return getPage(getUrl());//new InfoItemsPage<>(getInfoItemSearchCollector(), null); + return getPage(getUrl()); } @Override - public String getNextPageUrl() throws IOException, ExtractionException { - return null; + public String getNextPageUrl() throws ExtractionException { + String url = getUrl(); + return url.substring(0, url.length() - 1).concat("2"); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampSearchQueryHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampSearchQueryHandlerFactory.java index 3f23956f..0fad75d5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampSearchQueryHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampSearchQueryHandlerFactory.java @@ -21,7 +21,8 @@ public class BandcampSearchQueryHandlerFactory extends SearchQueryHandlerFactory try { return SEARCH_URL + - URLEncoder.encode(query, CHARSET_UTF_8); + URLEncoder.encode(query, CHARSET_UTF_8) + + "&page=1"; } catch (UnsupportedEncodingException e) { throw new ParsingException("query \"" + query + "\" could not be encoded", e); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java index 45624fe7..8cffcf73 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchExtractorTest.java @@ -89,4 +89,17 @@ public class BandcampSearchExtractorTest { assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount()); } + + /** + * Tests searches with multiple pages + */ + @Test + public void testMultiplePages() throws ExtractionException, IOException { + // A query practically guaranteed to have the maximum amount of pages + SearchExtractor extractor = bandcamp.getSearchExtractor("e"); + + assertEquals("https://bandcamp.com/search?q=e&page=2", extractor.getInitialPage().getNextPageUrl()); + + assertEquals("https://bandcamp.com/search?q=e&page=3", extractor.getPage(extractor.getNextPageUrl()).getNextPageUrl()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchQueryHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchQueryHandlerFactoryTest.java index 1a8fdac5..4158e655 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchQueryHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampSearchQueryHandlerFactoryTest.java @@ -27,8 +27,8 @@ public class BandcampSearchQueryHandlerFactoryTest { @Test public void testEncoding() throws ParsingException { // Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works - assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D", searchQuery.getUrl("hello!\"§$%&/()=")); + assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D&page=1", searchQuery.getUrl("hello!\"§$%&/()=")); // Note: bandcamp uses %20 instead of '+', but both works - assertEquals("https://bandcamp.com/search?q=search+query+with+spaces", searchQuery.getUrl("search query with spaces")); + assertEquals("https://bandcamp.com/search?q=search+query+with+spaces&page=1", searchQuery.getUrl("search query with spaces")); } }