Merge pull request #964 from AudricV/yt-support-handles-and-all-channel-usernames

[YouTube] Support handles and all channel usernames
This commit is contained in:
AudricV 2022-11-04 12:09:15 +01:00 committed by GitHub
commit eb07d70a2c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 171 additions and 152 deletions

View file

@ -1,3 +1,23 @@
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org>
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import java.util.regex.Pattern;
@ -6,36 +26,21 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.net.URL;
import java.util.List;
/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org>
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import static org.schabi.newpipe.extractor.utils.Utils.isBlank;
public final class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory {
private static final YoutubeChannelLinkHandlerFactory INSTANCE
= new YoutubeChannelLinkHandlerFactory();
private static final Pattern EXCLUDED_SEGMENTS =
Pattern.compile("playlist|watch|attribution_link|watch_popup|embed|feed|select_site");
private static final Pattern EXCLUDED_SEGMENTS = Pattern.compile(
// CHECKSTYLE:OFF
"playlist|watch|attribution_link|watch_popup|embed|feed|select_site|account|reporthistory|redirect");
// CHECKSTYLE:ON
private YoutubeChannelLinkHandlerFactory() {
}
@ -45,10 +50,10 @@ public final class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFacto
}
/**
* Returns URL to channel from an ID
* Returns the URL to a channel from an ID.
*
* @param id Channel ID including e.g. 'channel/'
* @return URL to channel
* @param id the channel ID including e.g. 'channel/'
* @return the URL to the channel
*/
@Override
public String getUrl(final String id,
@ -58,16 +63,26 @@ public final class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFacto
}
/**
* Returns true if path conform to
* custom short channel URLs like youtube.com/yourcustomname
* Checks whether the given path conforms to custom short channel URLs like
* {@code youtube.com/yourcustomname}.
*
* @param splitPath path segments array
* @return true - if value conform to short channel URL, false - not
* @param splitPath the path segments array
* @return whether the value conform to short channel URLs
*/
private boolean isCustomShortChannelUrl(final String[] splitPath) {
private boolean isCustomShortChannelUrl(@Nonnull final String[] splitPath) {
return splitPath.length == 1 && !EXCLUDED_SEGMENTS.matcher(splitPath[0]).matches();
}
/**
* Checks whether the given path conforms to handle URLs like {@code youtube.com/@yourhandle}.
*
* @param splitPath the path segments array
* @return whether the value conform to handle URLs
*/
private boolean isHandle(@Nonnull final String[] splitPath) {
return splitPath.length > 0 && splitPath[0].startsWith("@");
}
@Override
public String getId(final String url) throws ParsingException {
try {
@ -77,35 +92,38 @@ public final class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFacto
if (!Utils.isHTTP(urlObj) || !(YoutubeParsingHelper.isYoutubeURL(urlObj)
|| YoutubeParsingHelper.isInvidioURL(urlObj)
|| YoutubeParsingHelper.isHooktubeURL(urlObj))) {
throw new ParsingException("the URL given is not a Youtube-URL");
throw new ParsingException("The URL given is not a YouTube URL");
}
// remove leading "/"
// Remove leading "/"
path = path.substring(1);
String[] splitPath = path.split("/");
// Handle custom short channel URLs like youtube.com/yourcustomname
if (isCustomShortChannelUrl(splitPath)) {
if (isHandle(splitPath)) {
// Handle YouTube handle URLs like youtube.com/@yourhandle
return splitPath[0];
} else if (isCustomShortChannelUrl(splitPath)) {
// Handle custom short channel URLs like youtube.com/yourcustomname
path = "c/" + path;
splitPath = path.split("/");
}
if (!path.startsWith("user/")
&& !path.startsWith("channel/")
if (!path.startsWith("user/") && !path.startsWith("channel/")
&& !path.startsWith("c/")) {
throw new ParsingException("the URL given is neither a channel nor an user");
throw new ParsingException(
"The given URL is not a channel, a user or a handle URL");
}
final String id = splitPath[1];
if (id == null || !id.matches("[A-Za-z0-9_-]+")) {
throw new ParsingException("The given id is not a Youtube-Video-ID");
if (isBlank(id)) {
throw new ParsingException("The given ID is not a YouTube channel or user ID");
}
return splitPath[0] + "/" + id;
} catch (final Exception exception) {
throw new ParsingException("Error could not parse url :" + exception.getMessage(),
exception);
} catch (final Exception e) {
throw new ParsingException("Could not parse URL :" + e.getMessage(), e);
}
}

View file

@ -155,7 +155,7 @@ public class YoutubeChannelExtractorTest {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "gronkh"));
extractor = (YoutubeChannelExtractor) YouTube
.getChannelExtractor("http://www.youtube.com/user/Gronkh");
.getChannelExtractor("http://www.youtube.com/@Gronkh");
extractor.fetchPage();
}
@ -185,7 +185,7 @@ public class YoutubeChannelExtractorTest {
@Test
public void testOriginalUrl() throws ParsingException {
assertEquals("http://www.youtube.com/user/Gronkh", extractor.getOriginalUrl());
assertEquals("http://www.youtube.com/@Gronkh", extractor.getOriginalUrl());
}
/*//////////////////////////////////////////////////////////////////////////

View file

@ -25,11 +25,12 @@ public class YoutubeChannelLinkHandlerFactoryTest {
}
@Test
public void acceptUrlTest() throws ParsingException {
void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders"));
assertTrue(linkHandler.acceptUrl("https://youtube.com/DIMENSI0N"));
@ -49,6 +50,7 @@ public class YoutubeChannelLinkHandlerFactoryTest {
assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watchismo"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/@YouTube"));
// do not accept URLs which are not channels
assertFalse(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100"));
@ -62,14 +64,13 @@ public class YoutubeChannelLinkHandlerFactoryTest {
}
@Test
public void getIdFromUrl() throws ParsingException {
void getIdFromUrl() throws ParsingException {
assertEquals("user/Gronkh", linkHandler.fromUrl("https://www.youtube.com/user/Gronkh").getId());
assertEquals("user/Netzkino", linkHandler.fromUrl("https://www.youtube.com/user/Netzkino/videos").getId());
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA").getId());
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1").getId());
assertEquals("user/Gronkh", linkHandler.fromUrl("https://hooktube.com/user/Gronkh").getId());
assertEquals("user/Netzkino", linkHandler.fromUrl("https://hooktube.com/user/Netzkino/videos").getId());
@ -84,5 +85,9 @@ public class YoutubeChannelLinkHandlerFactoryTest {
assertEquals("c/creatoracademy", linkHandler.fromUrl("https://www.youtube.com/c/creatoracademy").getId());
assertEquals("c/YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/c/YouTubeCreators").getId());
assertEquals("c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders", linkHandler.fromUrl("https://www.youtube.com/c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders").getId());
assertEquals("@Gronkh", linkHandler.fromUrl("https://www.youtube.com/@Gronkh?ucbcb=1").getId());
assertEquals("@YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/@YouTubeCreators/shorts").getId());
}
}

View file

@ -41,10 +41,10 @@
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Wed, 02 Nov 2022 17:40:36 GMT"
"Wed, 02 Nov 2022 23:12:52 GMT"
],
"expires": [
"Wed, 02 Nov 2022 17:40:36 GMT"
"Wed, 02 Nov 2022 23:12:52 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
@ -59,9 +59,9 @@
"ESF"
],
"set-cookie": [
"YSC\u003dIPHKPblTox0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 06-Feb-2020 17:40:36 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+350; expires\u003dFri, 01-Nov-2024 17:40:36 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
"YSC\u003daFOfH_xu8k4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 06-Feb-2020 23:12:52 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+976; expires\u003dFri, 01-Nov-2024 23:12:52 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"

View file

@ -305,11 +305,7 @@
111,
109,
47,
117,
115,
101,
114,
47,
64,
71,
114,
111,
@ -338,10 +334,10 @@
"application/json; charset\u003dUTF-8"
],
"date": [
"Wed, 02 Nov 2022 17:40:38 GMT"
"Wed, 02 Nov 2022 23:12:53 GMT"
],
"expires": [
"Wed, 02 Nov 2022 17:40:38 GMT"
"Wed, 02 Nov 2022 23:12:53 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See g.co/p3phelp for more info.\""
@ -350,7 +346,7 @@
"scaffolding on HTTPServer2"
],
"set-cookie": [
"CONSENT\u003dPENDING+067; expires\u003dFri, 01-Nov-2024 17:40:38 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
"CONSENT\u003dPENDING+288; expires\u003dFri, 01-Nov-2024 23:12:53 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"vary": [
"Origin",
@ -367,7 +363,7 @@
"0"
]
},
"responseBody": "{\"responseContext\":{\"visitorData\":\"CgtRX2dsZzVrWGRZQSiW14qbBg%3D%3D\",\"serviceTrackingParams\":[{\"service\":\"CSI\",\"params\":[{\"key\":\"c\",\"value\":\"WEB\"},{\"key\":\"cver\",\"value\":\"2.20221101.00.00\"},{\"key\":\"yt_li\",\"value\":\"0\"},{\"key\":\"ResolveUrl_rid\",\"value\":\"0x1a56ddef412e73c5\"}]},{\"service\":\"GFEEDBACK\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"},{\"key\":\"e\",\"value\":\"1714240,9453586,9453587,23804281,23882502,23918597,23934970,23946420,23966208,23983296,23986022,23998056,24001373,24002022,24002025,24004644,24007246,24034168,24036948,24077241,24080738,24120819,24135310,24140247,24152443,24161116,24162920,24164186,24166867,24169501,24175559,24181174,24184445,24185614,24187043,24187377,24191629,24199724,24211178,24216872,24219359,24219713,24224266,24229161,24241378,24248092,24248955,24249296,24254502,24255543,24255545,24260783,24262346,24263273,24263796,24265820,24267564,24267570,24268142,24274310,24276618,24278596,24279196,24279628,24280997,24283093,24283556,24286005,24286017,24287169,24287327,24287604,24287795,24288045,24288912,24290971,24291857,24292955,24292977,24299747,24390675,24391541,24391851,24392269,24392403,24392421,24393382,24394397,24396645,24396819,24398124,24398991,24399052,24399918,24400658,24401557,24406381,24406984,24407199,24410009,39322399,39322504,39322574\"}]},{\"service\":\"GUIDED_HELP\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"}]},{\"service\":\"ECATCHER\",\"params\":[{\"key\":\"client.version\",\"value\":\"2.20221101\"},{\"key\":\"client.name\",\"value\":\"WEB\"},{\"key\":\"client.fexp\",\"value\":\"24283556,24249296,24185614,24263273,24274310,1714240,24262346,24263796,24260783,24267564,24187043,23986022,24406381,24299747,24399052,24248092,24211178,24291857,24152443,24135310,24407199,24191629,24229161,24396819,24241378,24391541,24292977,24164186,24169501,24276618,24400658,24280997,24401557,24120819,24288045,39322574,24224266,24034168,23934970,24002022,24181174,24287795,24175559,23882502,24283093,24001373,24292955,24290971,24161116,9453587,24393382,24410009,24391851,24288912,24394397,24392269,24002025,24268142,24399918,24390675,24278596,24216872,24286005,24219713,24080738,23918597,39322504,24279196,24392403,24396645,9453586,24279628,39322399,24265820,24254502,24036948,24162920,24398991,24255545,24166867,23998056,24007246,24392421,24255543,24398124,24286017,24140247,23804281,24287169,24287327,23983296,24406984,24199724,23966208,24184445,24219359,24267570,24248955,23946420,24187377,24077241,24004644,24287604\"}]}],\"mainAppWebResponseContext\":{\"loggedOut\":true},\"webResponseContextExtensionData\":{\"hasDecorated\":true}},\"endpoint\":{\"clickTrackingParams\":\"IhMI1s3RtYWQ-wIV99URCB21lwMbMghleHRlcm5hbA\u003d\u003d\",\"commandMetadata\":{\"webCommandMetadata\":{\"url\":\"/youtubei/v1/navigation/resolve_url\",\"webPageType\":\"WEB_PAGE_TYPE_CHANNEL\",\"rootVe\":3611,\"apiUrl\":\"/youtubei/v1/browse\"},\"resolveUrlCommandMetadata\":{\"isVanityUrl\":true}},\"browseEndpoint\":{\"browseId\":\"UCYJ61XIK64sp6ZFFS8sctxw\",\"params\":\"EgC4AQDyBgQKAjIA\"}}}",
"responseBody": "{\"responseContext\":{\"visitorData\":\"CgtieTdiV1lXeWFmTSj18oubBg%3D%3D\",\"serviceTrackingParams\":[{\"service\":\"CSI\",\"params\":[{\"key\":\"c\",\"value\":\"WEB\"},{\"key\":\"cver\",\"value\":\"2.20221101.00.00\"},{\"key\":\"yt_li\",\"value\":\"0\"},{\"key\":\"ResolveUrl_rid\",\"value\":\"0x01203331c856c87e\"}]},{\"service\":\"GFEEDBACK\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"},{\"key\":\"e\",\"value\":\"1714254,23804281,23882502,23918597,23934970,23940247,23946420,23966208,23983296,23986015,23998056,24001373,24002022,24002025,24004644,24007246,24034168,24036948,24077241,24080738,24120819,24135310,24140247,24152443,24161116,24162919,24164186,24166867,24169501,24181174,24185614,24187043,24187377,24191629,24199724,24211178,24218780,24219713,24224266,24224808,24229161,24241378,24248091,24254502,24255543,24255545,24256985,24260783,24262346,24262775,24263796,24265820,24267564,24267570,24268142,24273932,24278596,24279196,24279628,24280221,24283093,24283556,24286003,24286017,24286291,24287169,24287327,24287795,24288045,24288912,24290842,24290971,24291857,24292955,24297748,24298082,24299548,24299747,24390376,24390675,24390916,24391541,24392399,24393382,24394397,24396645,24396818,24398124,24398981,24400943,24401137,24401291,24401557,24406381,24406605,24406984,24407200,24408325,39322399,39322504,39322574\"}]},{\"service\":\"GUIDED_HELP\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"}]},{\"service\":\"ECATCHER\",\"params\":[{\"key\":\"client.version\",\"value\":\"2.20221101\"},{\"key\":\"client.name\",\"value\":\"WEB\"},{\"key\":\"client.fexp\",\"value\":\"24135310,24080738,24255543,24267570,23918597,24406605,24164186,24396818,24398124,23804281,24406984,24401137,24286291,23983296,24219713,23966208,24287327,24287169,24291857,24398981,24248091,24152443,24286003,24140247,24286017,24390675,24290842,24036948,24224808,24396645,1714254,24218780,39322399,24161116,24299747,23940247,24401291,24400943,24278596,24256985,24001373,23946420,24268142,24298082,24290971,24077241,24292955,24408325,24229161,24169501,24401557,24391541,24241378,24390916,24297748,24224266,24002022,24280221,23934970,24407200,24034168,24262775,39322574,24181174,23882502,24211178,24120819,24265820,24288045,39322504,24254502,24288912,24166867,24255545,24393382,24279628,24394397,24002025,24279196,24273932,24191629,24283093,24263796,24187043,24406381,24390376,24267564,24260783,24287795,24392399,24004644,24007246,23986015,24299548,24162919,24187377,24283556,24262346,23998056,24185614,24199724\"}]}],\"mainAppWebResponseContext\":{\"loggedOut\":true},\"webResponseContextExtensionData\":{\"hasDecorated\":true}},\"endpoint\":{\"clickTrackingParams\":\"IhMIz4TI18-Q-wIV5YE4Ch01jgrjMghleHRlcm5hbA\u003d\u003d\",\"commandMetadata\":{\"webCommandMetadata\":{\"url\":\"/youtubei/v1/navigation/resolve_url\",\"webPageType\":\"WEB_PAGE_TYPE_CHANNEL\",\"rootVe\":3611,\"apiUrl\":\"/youtubei/v1/browse\"},\"resolveUrlCommandMetadata\":{\"isVanityUrl\":true}},\"browseEndpoint\":{\"browseId\":\"UCYJ61XIK64sp6ZFFS8sctxw\",\"params\":\"EgC4AQDyBgQKAjIA\"}}}",
"latestUrl": "https://www.youtube.com/youtubei/v1/navigation/resolve_url?key\u003dAIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8\u0026prettyPrint\u003dfalse"
}
}