Bandcamp search: multiple pages
This commit is contained in:
parent
ce2a88e56f
commit
9b16baffb7
4 changed files with 47 additions and 7 deletions
|
@ -89,19 +89,45 @@ public class BandcampSearchExtractor extends SearchExtractor {
|
|||
|
||||
}
|
||||
|
||||
// Count pages
|
||||
Elements pageLists = d.getElementsByClass("pagelist");
|
||||
if (pageLists.size() == 0)
|
||||
return new InfoItemsPage<>(collector, null);
|
||||
|
||||
Elements pages = pageLists.first().getElementsByTag("li");
|
||||
|
||||
// Find current page
|
||||
int currentPage = -1;
|
||||
for (int i = 0; i < pages.size(); i++) {
|
||||
Element page = pages.get(i);
|
||||
if (page.getElementsByTag("span").size() > 0) {
|
||||
currentPage = i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Search results appear to be capped at six pages
|
||||
assert pages.size() < 10;
|
||||
|
||||
String nextUrl = null;
|
||||
if (currentPage < pages.size()) {
|
||||
nextUrl = pageUrl.substring(0, pageUrl.length() - 1) + (currentPage + 1);
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector, nextUrl);
|
||||
|
||||
return new InfoItemsPage<>(getInfoItemSearchCollector(), null);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||
return getPage(getUrl());//new InfoItemsPage<>(getInfoItemSearchCollector(), null);
|
||||
return getPage(getUrl());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws IOException, ExtractionException {
|
||||
return null;
|
||||
public String getNextPageUrl() throws ExtractionException {
|
||||
String url = getUrl();
|
||||
return url.substring(0, url.length() - 1).concat("2");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,8 @@ public class BandcampSearchQueryHandlerFactory extends SearchQueryHandlerFactory
|
|||
try {
|
||||
|
||||
return SEARCH_URL +
|
||||
URLEncoder.encode(query, CHARSET_UTF_8);
|
||||
URLEncoder.encode(query, CHARSET_UTF_8)
|
||||
+ "&page=1";
|
||||
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new ParsingException("query \"" + query + "\" could not be encoded", e);
|
||||
|
|
|
@ -89,4 +89,17 @@ public class BandcampSearchExtractorTest {
|
|||
assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests searches with multiple pages
|
||||
*/
|
||||
@Test
|
||||
public void testMultiplePages() throws ExtractionException, IOException {
|
||||
// A query practically guaranteed to have the maximum amount of pages
|
||||
SearchExtractor extractor = bandcamp.getSearchExtractor("e");
|
||||
|
||||
assertEquals("https://bandcamp.com/search?q=e&page=2", extractor.getInitialPage().getNextPageUrl());
|
||||
|
||||
assertEquals("https://bandcamp.com/search?q=e&page=3", extractor.getPage(extractor.getNextPageUrl()).getNextPageUrl());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,8 +27,8 @@ public class BandcampSearchQueryHandlerFactoryTest {
|
|||
@Test
|
||||
public void testEncoding() throws ParsingException {
|
||||
// Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works
|
||||
assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D", searchQuery.getUrl("hello!\"§$%&/()="));
|
||||
assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D&page=1", searchQuery.getUrl("hello!\"§$%&/()="));
|
||||
// Note: bandcamp uses %20 instead of '+', but both works
|
||||
assertEquals("https://bandcamp.com/search?q=search+query+with+spaces", searchQuery.getUrl("search query with spaces"));
|
||||
assertEquals("https://bandcamp.com/search?q=search+query+with+spaces&page=1", searchQuery.getUrl("search query with spaces"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue