Add ability to identify short-form StreamInfoItems

This commit is contained in:
chowder 2022-10-17 21:51:16 +01:00
parent 3d314169b9
commit daf5674951
8 changed files with 603 additions and 0 deletions

View file

@ -324,4 +324,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
return null;
}
@Override
public boolean isShortFormContent() throws ParsingException {
try {
final String webPageType = videoInfo.getObject("navigationEndpoint")
.getObject("commandMetadata").getObject("webCommandMetadata")
.getString("webPageType");
return !isNullOrEmpty(webPageType) && webPageType.equals("WEB_PAGE_TYPE_SHORTS");
} catch (final Exception e) {
throw new ParsingException("Could not determine if short-form content", e);
}
}
}

View file

@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
private String uploaderUrl = null;
private String uploaderAvatarUrl = null;
private boolean uploaderVerified = false;
private boolean shortFormContent = false;
public StreamInfoItem(final int serviceId,
final String url,
@ -130,6 +131,15 @@ public class StreamInfoItem extends InfoItem {
this.uploaderVerified = uploaderVerified;
}
public boolean isShortFormContent() {
return shortFormContent;
}
public void setShortFormContent(final boolean shortFormContent) {
this.shortFormContent = shortFormContent;
}
@Override
public String toString() {
return "StreamInfoItem{"

View file

@ -127,4 +127,14 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
default String getShortDescription() throws ParsingException {
return null;
}
/**
* Check if the stream is a short-form content
*
* @return {@code true} if the stream is a short-form content
* @throws ParsingException thrown if there is an error in the extraction
*/
default boolean isShortFormContent() throws ParsingException {
return false;
}
}

View file

@ -98,6 +98,11 @@ public class StreamInfoItemsCollector
} catch (final Exception e) {
addError(e);
}
try {
resultItem.setShortFormContent(extractor.isShortFormContent());
} catch (final Exception e) {
addError(e);
}
return resultItem;
}

View file

@ -379,4 +379,41 @@ public class YoutubeSearchExtractorTest {
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
}
}
public static class ShortFormContent extends DefaultSearchExtractorTest {
private static SearchExtractor extractor;
private static final String QUERY = "#shorts";
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
extractor.fetchPage();
}
@Override public SearchExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return QUERY; }
@Override public String expectedId() { return QUERY; }
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
@Override public String expectedSearchString() { return QUERY; }
@Nullable @Override public String expectedSearchSuggestion() { return null; }
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
@Test
public void testVideoDescription() throws IOException, ExtractionException {
final List<InfoItem> items = extractor.getInitialPage().getItems();
boolean hasShortFormContent = false;
for (InfoItem item : items) {
if (((StreamInfoItem) item).isShortFormContent()) {
hasShortFormContent = true;
break;
}
}
assertTrue(hasShortFormContent);
}
}
}

View file

@ -0,0 +1,82 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Mon, 17 Oct 2022 20:46:09 GMT"
],
"expires": [
"Mon, 17 Oct 2022 20:46:09 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003d6BKzjAdnbq0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dTue, 21-Jan-2020 20:46:09 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+680; expires\u003dWed, 16-Oct-2024 20:46:09 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}