Merge pull request #946 from chowder/dev

Add ability to identify short-form `StreamInfoItem`s
This commit is contained in:
AudricV 2022-11-01 12:19:58 +01:00 committed by GitHub
commit 4cae66f1f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 1591 additions and 8 deletions

View file

@ -1,13 +1,7 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
@ -18,12 +12,16 @@ import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable;
import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import javax.annotation.Nullable;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getUrlFromNavigationEndpoint;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -324,4 +322,46 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
return null;
}
@Override
public boolean isShortFormContent() throws ParsingException {
try {
final String webPageType = videoInfo.getObject("navigationEndpoint")
.getObject("commandMetadata").getObject("webCommandMetadata")
.getString("webPageType");
boolean isShort = !isNullOrEmpty(webPageType)
&& webPageType.equals("WEB_PAGE_TYPE_SHORTS");
if (!isShort) {
isShort = videoInfo.getObject("navigationEndpoint").has("reelWatchEndpoint");
}
if (!isShort) {
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay -> thumbnailOverlay.has(
"thumbnailOverlayTimeStatusRenderer"))
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
"thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
isShort = thumbnailTimeOverlay.getString("style", "")
.equalsIgnoreCase("SHORTS")
|| thumbnailTimeOverlay.getObject("icon")
.getString("iconType", "")
.toLowerCase()
.contains("shorts");
}
}
return isShort;
} catch (final Exception e) {
throw new ParsingException("Could not determine if this is short-form content", e);
}
}
}

View file

@ -554,6 +554,20 @@ public abstract class StreamExtractor extends Extractor {
return Collections.emptyList();
}
/**
* Whether the stream is a short-form content.
*
* <p>
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
* videos.
* </p>
*
* @return whether the stream is a short-form content
*/
public boolean isShortFormContent() throws ParsingException {
return false;
}
public enum Privacy {
PUBLIC,
UNLISTED,

View file

@ -342,6 +342,11 @@ public class StreamInfo extends Info {
} catch (final Exception e) {
streamInfo.addError(e);
}
try {
streamInfo.setShortFormContent(extractor.isShortFormContent());
} catch (final Exception e) {
streamInfo.addError(e);
}
streamInfo.setRelatedItems(ExtractorHelper.getRelatedItemsOrLogError(streamInfo,
extractor));
@ -389,6 +394,7 @@ public class StreamInfo extends Info {
private List<String> tags = new ArrayList<>();
private List<StreamSegment> streamSegments = new ArrayList<>();
private List<MetaInfo> metaInfo = new ArrayList<>();
private boolean shortFormContent = false;
/**
* Preview frames, e.g. for the storyboard / seekbar thumbnail preview
@ -724,4 +730,12 @@ public class StreamInfo extends Info {
public List<MetaInfo> getMetaInfo() {
return this.metaInfo;
}
public boolean isShortFormContent() {
return shortFormContent;
}
public void setShortFormContent(final boolean isShortFormContent) {
this.shortFormContent = isShortFormContent;
}
}

View file

@ -42,6 +42,7 @@ public class StreamInfoItem extends InfoItem {
private String uploaderUrl = null;
private String uploaderAvatarUrl = null;
private boolean uploaderVerified = false;
private boolean shortFormContent = false;
public StreamInfoItem(final int serviceId,
final String url,
@ -130,6 +131,14 @@ public class StreamInfoItem extends InfoItem {
this.uploaderVerified = uploaderVerified;
}
public boolean isShortFormContent() {
return shortFormContent;
}
public void setShortFormContent(final boolean shortFormContent) {
this.shortFormContent = shortFormContent;
}
@Override
public String toString() {
return "StreamInfoItem{"

View file

@ -127,4 +127,18 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
default String getShortDescription() throws ParsingException {
return null;
}
/**
* Whether the stream is a short-form content.
*
* <p>
* Short-form contents are contents in the style of TikTok, YouTube Shorts, or Instagram Reels
* videos.
* </p>
*
* @return whether the stream is a short-form content
*/
default boolean isShortFormContent() throws ParsingException {
return false;
}
}

View file

@ -98,6 +98,11 @@ public class StreamInfoItemsCollector
} catch (final Exception e) {
addError(e);
}
try {
resultItem.setShortFormContent(extractor.isShortFormContent());
} catch (final Exception e) {
addError(e);
}
return resultItem;
}

View file

@ -30,9 +30,10 @@ import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.List;
@ -379,4 +380,45 @@ public class YoutubeSearchExtractorTest {
assertNotNull(((StreamInfoItem) items.get(0)).getShortDescription());
}
}
public static class ShortFormContent extends DefaultSearchExtractorTest {
private static SearchExtractor extractor;
private static final String QUERY = "#shorts";
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "shorts"));
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
extractor.fetchPage();
}
private String getUrlEncodedQuery() {
try {
return URLEncoder.encode(QUERY, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
@Override public SearchExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return QUERY; }
@Override public String expectedId() { return QUERY; }
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
@Override public String expectedOriginalUrlContains() { return "youtube.com/results?search_query=" + getUrlEncodedQuery(); }
@Override public String expectedSearchString() { return QUERY; }
@Nullable @Override public String expectedSearchSuggestion() { return null; }
@Override public InfoItem.InfoType expectedInfoItemType() { return InfoItem.InfoType.STREAM; }
@Test
void testShortFormContent() throws IOException, ExtractionException {
assertTrue(extractor.getInitialPage()
.getItems()
.stream()
.filter(StreamInfoItem.class::isInstance)
.map(StreamInfoItem.class::cast)
.anyMatch(StreamInfoItem::isShortFormContent));
}
}
}

View file

@ -0,0 +1,82 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy-report-only": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Tue, 18 Oct 2022 22:13:02 GMT"
],
"expires": [
"Tue, 18 Oct 2022 22:13:02 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dtvxMn34iTRM; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dWed, 22-Jan-2020 22:13:02 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+265; expires\u003dThu, 17-Oct-2024 22:13:02 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}