Add ability to identify short-form StreamInfoItems
This commit is contained in:
parent
3d314169b9
commit
daf5674951
8 changed files with 603 additions and 0 deletions
|
@ -324,4 +324,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isShortFormContent() throws ParsingException {
|
||||||
|
try {
|
||||||
|
final String webPageType = videoInfo.getObject("navigationEndpoint")
|
||||||
|
.getObject("commandMetadata").getObject("webCommandMetadata")
|
||||||
|
.getString("webPageType");
|
||||||
|
return !isNullOrEmpty(webPageType) && webPageType.equals("WEB_PAGE_TYPE_SHORTS");
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new ParsingException("Could not determine if short-form content", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
|
||||||
private String uploaderUrl = null;
|
private String uploaderUrl = null;
|
||||||
private String uploaderAvatarUrl = null;
|
private String uploaderAvatarUrl = null;
|
||||||
private boolean uploaderVerified = false;
|
private boolean uploaderVerified = false;
|
||||||
|
private boolean shortFormContent = false;
|
||||||
|
|
||||||
public StreamInfoItem(final int serviceId,
|
public StreamInfoItem(final int serviceId,
|
||||||
final String url,
|
final String url,
|
||||||
|
@ -130,6 +131,15 @@ public class StreamInfoItem extends InfoItem {
|
||||||
this.uploaderVerified = uploaderVerified;
|
this.uploaderVerified = uploaderVerified;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isShortFormContent() {
|
||||||
|
return shortFormContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setShortFormContent(final boolean shortFormContent) {
|
||||||
|
this.shortFormContent = shortFormContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "StreamInfoItem{"
|
return "StreamInfoItem{"
|
||||||
|
|
|
@ -127,4 +127,14 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
|
||||||
default String getShortDescription() throws ParsingException {
|
default String getShortDescription() throws ParsingException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the stream is a short-form content
|
||||||
|
*
|
||||||
|
* @return {@code true} if the stream is a short-form content
|
||||||
|
* @throws ParsingException thrown if there is an error in the extraction
|
||||||
|
*/
|
||||||
|
default boolean isShortFormContent() throws ParsingException {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,6 +98,11 @@ public class StreamInfoItemsCollector
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
addError(e);
|
addError(e);
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setShortFormContent(extractor.isShortFormContent());
|
||||||
|
} catch (final Exception e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
|
|
||||||
return resultItem;
|
return resultItem;
|
||||||
}
|
}
|
||||||
|
|
|
@ -379,4 +379,41 @@ public class YoutubeSearchExtractorTest {
|
||||||
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
|
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class ShortFormContent extends DefaultSearchExtractorTest {
|
||||||
|
private static SearchExtractor extractor;
|
||||||
|
private static final String QUERY = "#shorts";
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
YoutubeTestsUtils.ensureStateless();
|
||||||
|
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
|
||||||
|
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
|
||||||
|
extractor.fetchPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public SearchExtractor extractor() { return extractor; }
|
||||||
|
@Override public StreamingService expectedService() { return YouTube; }
|
||||||
|
@Override public String expectedName() { return QUERY; }
|
||||||
|
@Override public String expectedId() { return QUERY; }
|
||||||
|
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
|
||||||
|
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
|
||||||
|
@Override public String expectedSearchString() { return QUERY; }
|
||||||
|
@Nullable @Override public String expectedSearchSuggestion() { return null; }
|
||||||
|
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVideoDescription() throws IOException, ExtractionException {
|
||||||
|
final List<InfoItem> items = extractor.getInitialPage().getItems();
|
||||||
|
boolean hasShortFormContent = false;
|
||||||
|
for (InfoItem item : items) {
|
||||||
|
if (((StreamInfoItem) item).isShortFormContent()) {
|
||||||
|
hasShortFormContent = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(hasShortFormContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
{
|
||||||
|
"request": {
|
||||||
|
"httpMethod": "GET",
|
||||||
|
"url": "https://www.youtube.com/sw.js",
|
||||||
|
"headers": {
|
||||||
|
"Origin": [
|
||||||
|
"https://www.youtube.com"
|
||||||
|
],
|
||||||
|
"Referer": [
|
||||||
|
"https://www.youtube.com"
|
||||||
|
],
|
||||||
|
"Accept-Language": [
|
||||||
|
"en-GB, en;q\u003d0.9"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"localization": {
|
||||||
|
"languageCode": "en",
|
||||||
|
"countryCode": "GB"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"responseCode": 200,
|
||||||
|
"responseMessage": "",
|
||||||
|
"responseHeaders": {
|
||||||
|
"access-control-allow-credentials": [
|
||||||
|
"true"
|
||||||
|
],
|
||||||
|
"access-control-allow-origin": [
|
||||||
|
"https://www.youtube.com"
|
||||||
|
],
|
||||||
|
"alt-svc": [
|
||||||
|
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
||||||
|
],
|
||||||
|
"cache-control": [
|
||||||
|
"private, max-age\u003d0"
|
||||||
|
],
|
||||||
|
"content-type": [
|
||||||
|
"text/javascript; charset\u003dutf-8"
|
||||||
|
],
|
||||||
|
"cross-origin-opener-policy-report-only": [
|
||||||
|
"same-origin; report-to\u003d\"youtube_main\""
|
||||||
|
],
|
||||||
|
"date": [
|
||||||
|
"Mon, 17 Oct 2022 20:46:09 GMT"
|
||||||
|
],
|
||||||
|
"expires": [
|
||||||
|
"Mon, 17 Oct 2022 20:46:09 GMT"
|
||||||
|
],
|
||||||
|
"p3p": [
|
||||||
|
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||||
|
],
|
||||||
|
"permissions-policy": [
|
||||||
|
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||||
|
],
|
||||||
|
"report-to": [
|
||||||
|
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||||
|
],
|
||||||
|
"server": [
|
||||||
|
"ESF"
|
||||||
|
],
|
||||||
|
"set-cookie": [
|
||||||
|
"YSC\u003d6BKzjAdnbq0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||||
|
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dTue, 21-Jan-2020 20:46:09 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||||
|
"CONSENT\u003dPENDING+680; expires\u003dWed, 16-Oct-2024 20:46:09 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||||
|
],
|
||||||
|
"strict-transport-security": [
|
||||||
|
"max-age\u003d31536000"
|
||||||
|
],
|
||||||
|
"x-content-type-options": [
|
||||||
|
"nosniff"
|
||||||
|
],
|
||||||
|
"x-frame-options": [
|
||||||
|
"SAMEORIGIN"
|
||||||
|
],
|
||||||
|
"x-xss-protection": [
|
||||||
|
"0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
|
||||||
|
"latestUrl": "https://www.youtube.com/sw.js"
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue