Add ability to identify short-form StreamInfoItems
This commit is contained in:
parent
3d314169b9
commit
daf5674951
8 changed files with 603 additions and 0 deletions
|
@ -324,4 +324,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isShortFormContent() throws ParsingException {
|
||||
try {
|
||||
final String webPageType = videoInfo.getObject("navigationEndpoint")
|
||||
.getObject("commandMetadata").getObject("webCommandMetadata")
|
||||
.getString("webPageType");
|
||||
return !isNullOrEmpty(webPageType) && webPageType.equals("WEB_PAGE_TYPE_SHORTS");
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not determine if short-form content", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
|
|||
private String uploaderUrl = null;
|
||||
private String uploaderAvatarUrl = null;
|
||||
private boolean uploaderVerified = false;
|
||||
private boolean shortFormContent = false;
|
||||
|
||||
public StreamInfoItem(final int serviceId,
|
||||
final String url,
|
||||
|
@ -130,6 +131,15 @@ public class StreamInfoItem extends InfoItem {
|
|||
this.uploaderVerified = uploaderVerified;
|
||||
}
|
||||
|
||||
public boolean isShortFormContent() {
|
||||
return shortFormContent;
|
||||
}
|
||||
|
||||
public void setShortFormContent(final boolean shortFormContent) {
|
||||
this.shortFormContent = shortFormContent;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "StreamInfoItem{"
|
||||
|
|
|
@ -127,4 +127,14 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
|
|||
default String getShortDescription() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the stream is a short-form content
|
||||
*
|
||||
* @return {@code true} if the stream is a short-form content
|
||||
* @throws ParsingException thrown if there is an error in the extraction
|
||||
*/
|
||||
default boolean isShortFormContent() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,6 +98,11 @@ public class StreamInfoItemsCollector
|
|||
} catch (final Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
try {
|
||||
resultItem.setShortFormContent(extractor.isShortFormContent());
|
||||
} catch (final Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
|
||||
return resultItem;
|
||||
}
|
||||
|
|
|
@ -379,4 +379,41 @@ public class YoutubeSearchExtractorTest {
|
|||
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
|
||||
}
|
||||
}
|
||||
|
||||
public static class ShortFormContent extends DefaultSearchExtractorTest {
|
||||
private static SearchExtractor extractor;
|
||||
private static final String QUERY = "#shorts";
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() throws Exception {
|
||||
YoutubeTestsUtils.ensureStateless();
|
||||
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
|
||||
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
@Override public SearchExtractor extractor() { return extractor; }
|
||||
@Override public StreamingService expectedService() { return YouTube; }
|
||||
@Override public String expectedName() { return QUERY; }
|
||||
@Override public String expectedId() { return QUERY; }
|
||||
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
|
||||
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
|
||||
@Override public String expectedSearchString() { return QUERY; }
|
||||
@Nullable @Override public String expectedSearchSuggestion() { return null; }
|
||||
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
|
||||
|
||||
@Test
|
||||
public void testVideoDescription() throws IOException, ExtractionException {
|
||||
final List<InfoItem> items = extractor.getInitialPage().getItems();
|
||||
boolean hasShortFormContent = false;
|
||||
for (InfoItem item : items) {
|
||||
if (((StreamInfoItem) item).isShortFormContent()) {
|
||||
hasShortFormContent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue(hasShortFormContent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
{
|
||||
"request": {
|
||||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/sw.js",
|
||||
"headers": {
|
||||
"Origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Referer": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Accept-Language": [
|
||||
"en-GB, en;q\u003d0.9"
|
||||
]
|
||||
},
|
||||
"localization": {
|
||||
"languageCode": "en",
|
||||
"countryCode": "GB"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"responseCode": 200,
|
||||
"responseMessage": "",
|
||||
"responseHeaders": {
|
||||
"access-control-allow-credentials": [
|
||||
"true"
|
||||
],
|
||||
"access-control-allow-origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"alt-svc": [
|
||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
||||
],
|
||||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
"cross-origin-opener-policy-report-only": [
|
||||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"date": [
|
||||
"Mon, 17 Oct 2022 20:46:09 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Mon, 17 Oct 2022 20:46:09 GMT"
|
||||
],
|
||||
"p3p": [
|
||||
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||
],
|
||||
"permissions-policy": [
|
||||
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||
],
|
||||
"report-to": [
|
||||
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||
],
|
||||
"server": [
|
||||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003d6BKzjAdnbq0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dTue, 21-Jan-2020 20:46:09 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"CONSENT\u003dPENDING+680; expires\u003dWed, 16-Oct-2024 20:46:09 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
],
|
||||
"x-content-type-options": [
|
||||
"nosniff"
|
||||
],
|
||||
"x-frame-options": [
|
||||
"SAMEORIGIN"
|
||||
],
|
||||
"x-xss-protection": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
|
||||
"latestUrl": "https://www.youtube.com/sw.js"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue