Merge pull request #946 from chowder/dev
Add ability to identify short-form `StreamInfoItem`s
This commit is contained in:
commit
4cae66f1f9
11 changed files with 1591 additions and 8 deletions
|
@ -1,13 +1,7 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
|
@ -18,12 +12,16 @@ import org.schabi.newpipe.extractor.stream.StreamType;
|
|||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.time.Instant;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
|
||||
/*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
|
@ -324,4 +322,46 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isShortFormContent() throws ParsingException {
|
||||
try {
|
||||
final String webPageType = videoInfo.getObject("navigationEndpoint")
|
||||
.getObject("commandMetadata").getObject("webCommandMetadata")
|
||||
.getString("webPageType");
|
||||
|
||||
boolean isShort = !isNullOrEmpty(webPageType)
|
||||
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
|
||||
|
||||
if (!isShort) {
|
||||
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
|
||||
}
|
||||
|
||||
if (!isShort) {
|
||||
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
|
||||
.stream()
|
||||
.filter(JsonObject.class::isInstance)
|
||||
.map(JsonObject.class::cast)
|
||||
.filter(thumbnailOverlay -> thumbnailOverlay.has(
|
||||
"thumbnailOverlayTimeStatusRenderer"))
|
||||
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
|
||||
"thumbnailOverlayTimeStatusRenderer"))
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
|
||||
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
|
||||
isShort = thumbnailTimeOverlay.getString("style", "")
|
||||
.equalsIgnoreCase("SHORTS")
|
||||
|| thumbnailTimeOverlay.getObject("icon")
|
||||
.getString("iconType", "")
|
||||
.toLowerCase()
|
||||
.contains("shorts");
|
||||
}
|
||||
}
|
||||
|
||||
return isShort;
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not determine if this is short-form content", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -554,6 +554,20 @@ public abstract class StreamExtractor extends Extractor {
|
|||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the stream is a short-form content.
|
||||
*
|
||||
* <p>
|
||||
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
|
||||
* videos.
|
||||
* </p>
|
||||
*
|
||||
* @return whether the stream is a short-form content
|
||||
*/
|
||||
public boolean isShortFormContent() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
|
||||
public enum Privacy {
|
||||
PUBLIC,
|
||||
UNLISTED,
|
||||
|
|
|
@ -342,6 +342,11 @@ public class StreamInfo extends Info {
|
|||
} catch (final Exception e) {
|
||||
streamInfo.addError(e);
|
||||
}
|
||||
try {
|
||||
streamInfo.setShortFormContent(extractor.isShortFormContent());
|
||||
} catch (final Exception e) {
|
||||
streamInfo.addError(e);
|
||||
}
|
||||
|
||||
streamInfo.setRelatedItems(ExtractorHelper.getRelatedItemsOrLogError(streamInfo,
|
||||
extractor));
|
||||
|
@ -389,6 +394,7 @@ public class StreamInfo extends Info {
|
|||
private List<String> tags = new ArrayList<>();
|
||||
private List<StreamSegment> streamSegments = new ArrayList<>();
|
||||
private List<MetaInfo> metaInfo = new ArrayList<>();
|
||||
private boolean shortFormContent = false;
|
||||
|
||||
/**
|
||||
* Preview frames, e.g. for the storyboard / seekbar thumbnail preview
|
||||
|
@ -724,4 +730,12 @@ public class StreamInfo extends Info {
|
|||
public List<MetaInfo> getMetaInfo() {
|
||||
return this.metaInfo;
|
||||
}
|
||||
|
||||
public boolean isShortFormContent() {
|
||||
return shortFormContent;
|
||||
}
|
||||
|
||||
public void setShortFormContent(final boolean isShortFormContent) {
|
||||
this.shortFormContent = isShortFormContent;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
|
|||
private String uploaderUrl = null;
|
||||
private String uploaderAvatarUrl = null;
|
||||
private boolean uploaderVerified = false;
|
||||
private boolean shortFormContent = false;
|
||||
|
||||
public StreamInfoItem(final int serviceId,
|
||||
final String url,
|
||||
|
@ -130,6 +131,14 @@ public class StreamInfoItem extends InfoItem {
|
|||
this.uploaderVerified = uploaderVerified;
|
||||
}
|
||||
|
||||
public boolean isShortFormContent() {
|
||||
return shortFormContent;
|
||||
}
|
||||
|
||||
public void setShortFormContent(final boolean shortFormContent) {
|
||||
this.shortFormContent = shortFormContent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "StreamInfoItem{"
|
||||
|
|
|
@ -127,4 +127,18 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
|
|||
default String getShortDescription() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the stream is a short-form content.
|
||||
*
|
||||
* <p>
|
||||
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
|
||||
* videos.
|
||||
* </p>
|
||||
*
|
||||
* @return whether the stream is a short-form content
|
||||
*/
|
||||
default boolean isShortFormContent() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,6 +98,11 @@ public class StreamInfoItemsCollector
|
|||
} catch (final Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
try {
|
||||
resultItem.setShortFormContent(extractor.isShortFormContent());
|
||||
} catch (final Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
|
||||
return resultItem;
|
||||
}
|
||||
|
|
|
@ -30,9 +30,10 @@ import org.schabi.newpipe.extractor.stream.Description;
|
|||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -379,4 +380,45 @@ public class YoutubeSearchExtractorTest {
|
|||
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
|
||||
}
|
||||
}
|
||||
|
||||
public static class ShortFormContent extends DefaultSearchExtractorTest {
|
||||
private static SearchExtractor extractor;
|
||||
private static final String QUERY = "#shorts";
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() throws Exception {
|
||||
YoutubeTestsUtils.ensureStateless();
|
||||
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
|
||||
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
private String getUrlEncodedQuery() {
|
||||
try {
|
||||
return URLEncoder.encode(QUERY, "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public SearchExtractor extractor() { return extractor; }
|
||||
@Override public StreamingService expectedService() { return YouTube; }
|
||||
@Override public String expectedName() { return QUERY; }
|
||||
@Override public String expectedId() { return QUERY; }
|
||||
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
|
||||
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
|
||||
@Override public String expectedSearchString() { return QUERY; }
|
||||
@Nullable @Override public String expectedSearchSuggestion() { return null; }
|
||||
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
|
||||
|
||||
@Test
|
||||
void testShortFormContent() throws IOException, ExtractionException {
|
||||
assertTrue(extractor.getInitialPage()
|
||||
.getItems()
|
||||
.stream()
|
||||
.filter(StreamInfoItem.class::isInstance)
|
||||
.map(StreamInfoItem.class::cast)
|
||||
.anyMatch(StreamInfoItem::isShortFormContent));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
{
|
||||
"request": {
|
||||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/sw.js",
|
||||
"headers": {
|
||||
"Origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Referer": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Accept-Language": [
|
||||
"en-GB, en;q\u003d0.9"
|
||||
]
|
||||
},
|
||||
"localization": {
|
||||
"languageCode": "en",
|
||||
"countryCode": "GB"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"responseCode": 200,
|
||||
"responseMessage": "",
|
||||
"responseHeaders": {
|
||||
"access-control-allow-credentials": [
|
||||
"true"
|
||||
],
|
||||
"access-control-allow-origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"alt-svc": [
|
||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
||||
],
|
||||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
"cross-origin-opener-policy-report-only": [
|
||||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"date": [
|
||||
"Tue, 18 Oct 2022 22:13:02 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Tue, 18 Oct 2022 22:13:02 GMT"
|
||||
],
|
||||
"p3p": [
|
||||
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||
],
|
||||
"permissions-policy": [
|
||||
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||
],
|
||||
"report-to": [
|
||||
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||
],
|
||||
"server": [
|
||||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003dtvxMn34iTRM; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dWed, 22-Jan-2020 22:13:02 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"CONSENT\u003dPENDING+265; expires\u003dThu, 17-Oct-2024 22:13:02 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
],
|
||||
"x-content-type-options": [
|
||||
"nosniff"
|
||||
],
|
||||
"x-frame-options": [
|
||||
"SAMEORIGIN"
|
||||
],
|
||||
"x-xss-protection": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
|
||||
"latestUrl": "https://www.youtube.com/sw.js"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue