Rework link handlers to correctly accept external websites
This commit is contained in:
parent
be562b8436
commit
04dd3d4d32
7 changed files with 63 additions and 22 deletions
|
@ -123,6 +123,28 @@ public class BandcampExtractorHelper {
|
|||
return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>true</code> if the given url looks like it comes from a bandcamp custom domain
|
||||
* or if it comes from bandcamp.com itself
|
||||
*/
|
||||
public static boolean isSupportedDomain(final String url) throws ParsingException {
|
||||
|
||||
// Accept all bandcamp.com URLs
|
||||
if (url.toLowerCase().matches("https?://.+\\.bandcamp\\.com(/.*)?")) return true;
|
||||
|
||||
try {
|
||||
// Accept all other URLs if they contain a <meta> tag that says they are generated by bandcamp
|
||||
return Jsoup.parse(
|
||||
NewPipe.getDownloader().get(url).responseBody()
|
||||
)
|
||||
.getElementsByAttributeValue("name", "generator")
|
||||
.attr("content").equals("Bandcamp");
|
||||
} catch (IOException | ReCaptchaException e) {
|
||||
throw new ParsingException("Could not determine whether URL is custom domain " +
|
||||
"(not available? network error?)");
|
||||
}
|
||||
}
|
||||
|
||||
static DateWrapper parseDate(final String textDate) throws ParsingException {
|
||||
try {
|
||||
final Date date = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH).parse(textDate);
|
||||
|
|
|
@ -24,7 +24,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||
try {
|
||||
final String response = NewPipe.getDownloader().get(url).responseBody();
|
||||
|
||||
// This variable contains band data!
|
||||
// Use band data embedded in website to extract ID
|
||||
final JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
|
||||
|
||||
return String.valueOf(bandData.getLong("id"));
|
||||
|
@ -51,17 +51,15 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* Matches <code>* .bandcamp.com</code> as well as custom domains
|
||||
* where the profile is at <code>* . * /releases</code>
|
||||
* Accepts only pages that do not lead to an album or track. Supports external pages.
|
||||
*/
|
||||
@Override
|
||||
public boolean onAcceptUrl(final String url) {
|
||||
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||
|
||||
// Is a subdomain of bandcamp.com?
|
||||
boolean isBandcampComArtistPage = url.matches("https?://.+\\.bandcamp\\.com/?");
|
||||
// Exclude URLs that lead to a track or album
|
||||
if (url.matches(".*/(album|track)/.*")) return false;
|
||||
|
||||
boolean isCustomDomainReleases = url.matches("https?://.+\\..+/releases/?(?!.)");
|
||||
|
||||
return isBandcampComArtistPage || isCustomDomainReleases;
|
||||
// Test whether domain is supported
|
||||
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
|
|||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -22,8 +23,16 @@ public class BandcampPlaylistLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts all bandcamp URLs that contain /album/ behind their domain name.
|
||||
*/
|
||||
@Override
|
||||
public boolean onAcceptUrl(final String url) {
|
||||
return url.toLowerCase().matches("https?://.+\\..+/album/.+");
|
||||
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||
|
||||
// Exclude URLs which do not lead to an album
|
||||
if (!url.toLowerCase().matches("https?://.+\\..+/album/.+")) return false;
|
||||
|
||||
// Test whether domain is supported
|
||||
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.linkHandler;
|
|||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper;
|
||||
|
||||
/**
|
||||
* <p>Tracks don't have standalone ids, they are always in combination with the band id.
|
||||
|
@ -40,16 +41,19 @@ public class BandcampStreamLinkHandlerFactory extends LinkHandlerFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sometimes, the root page of an artist is also an album or track
|
||||
* page. In that case, it is assumed that one actually wants to open
|
||||
* the profile and not the track it has set as the default one.
|
||||
* <p>Urls are expected to be in this format to account for
|
||||
* custom domains:</p>
|
||||
* <code>https:// * . * /track/ *</code>
|
||||
* Accepts URLs that point to a bandcamp radio show or that are a bandcamp
|
||||
* domain and point to a track.
|
||||
*/
|
||||
@Override
|
||||
public boolean onAcceptUrl(final String url) {
|
||||
return url.toLowerCase().matches("https?://.+\\..+/track/.+")
|
||||
|| url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+");
|
||||
public boolean onAcceptUrl(final String url) throws ParsingException {
|
||||
|
||||
// Accept Bandcamp radio
|
||||
if (url.toLowerCase().matches("https?://bandcamp\\.com/\\?show=\\d+")) return true;
|
||||
|
||||
// Don't accept URLs that don't point to a track
|
||||
if (!url.toLowerCase().matches("https?://.+\\..+/track/.+")) return false;
|
||||
|
||||
// Test whether domain is supported
|
||||
return BandcampExtractorHelper.isSupportedDomain(url);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,13 +26,19 @@ public class BandcampChannelLinkHandlerFactoryTest {
|
|||
|
||||
@Test
|
||||
public void testAcceptUrl() throws ParsingException {
|
||||
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/"));
|
||||
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
|
||||
// Bandcamp URLs
|
||||
assertTrue(linkHandler.acceptUrl("http://zachbenson.bandcamp.com"));
|
||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||
assertTrue(linkHandler.acceptUrl("https://billwurtz.bandcamp.com/releases"));
|
||||
|
||||
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||
|
||||
// External URLs
|
||||
assertTrue(linkHandler.acceptUrl("http://interovgm.com/releases/"));
|
||||
assertTrue(linkHandler.acceptUrl("https://interovgm.com/releases"));
|
||||
|
||||
assertFalse(linkHandler.acceptUrl("https://example.com/releases"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -35,6 +35,7 @@ public class BandcampPlaylistLinkHandlerFactoryTest {
|
|||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||
assertFalse(linkHandler.acceptUrl("https://interovgm.com/track/title"));
|
||||
assertFalse(linkHandler.acceptUrl("https://example.com/album/samplealbum"));
|
||||
|
||||
assertTrue(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/album/prom"));
|
||||
|
|
|
@ -43,6 +43,7 @@ public class BandcampStreamLinkHandlerFactoryTest {
|
|||
assertFalse(linkHandler.acceptUrl("https://bandcamp.com"));
|
||||
assertFalse(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/"));
|
||||
assertFalse(linkHandler.acceptUrl("https://powertothequeerkids.bandcamp.com/album/power-to-the-queer-kids"));
|
||||
assertFalse(linkHandler.acceptUrl("https://example.com/track/sampletrack"));
|
||||
|
||||
assertTrue(linkHandler.acceptUrl("https://zachbenson.bandcamp.com/track/kitchen"));
|
||||
assertTrue(linkHandler.acceptUrl("http://ZachBenson.Bandcamp.COM/Track/U-I-Tonite/"));
|
||||
|
|
Loading…
Reference in a new issue