Merge pull request #184 from Stypox/duplicate-subscription-fix
Fix inconsistency in youtube channel urls
This commit is contained in:
		
						commit
						430da57350
					
				
					 10 changed files with 68 additions and 25 deletions
				
			
		|  | @ -47,6 +47,7 @@ import java.util.ArrayList; | ||||||
| 
 | 
 | ||||||
| @SuppressWarnings("WeakerAccess") | @SuppressWarnings("WeakerAccess") | ||||||
| public class YoutubeChannelExtractor extends ChannelExtractor { | public class YoutubeChannelExtractor extends ChannelExtractor { | ||||||
|  |     /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/"; | ||||||
|     private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; |     private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; | ||||||
|     private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; |     private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; | ||||||
| 
 | 
 | ||||||
|  | @ -72,7 +73,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { | ||||||
|     @Override |     @Override | ||||||
|     public String getUrl() throws ParsingException { |     public String getUrl() throws ParsingException { | ||||||
|         try { |         try { | ||||||
|             return "https://www.youtube.com/channel/" + getId(); |             return CHANNEL_URL_BASE + getId(); | ||||||
|         } catch (ParsingException e) { |         } catch (ParsingException e) { | ||||||
|             return super.getUrl(); |             return super.getUrl(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -5,6 +5,9 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor; | ||||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
| import org.schabi.newpipe.extractor.utils.Utils; | import org.schabi.newpipe.extractor.utils.Utils; | ||||||
| 
 | 
 | ||||||
|  | import java.util.regex.Matcher; | ||||||
|  | import java.util.regex.Pattern; | ||||||
|  | 
 | ||||||
| /* | /* | ||||||
|  * Created by Christian Schabesberger on 12.02.17. |  * Created by Christian Schabesberger on 12.02.17. | ||||||
|  * |  * | ||||||
|  | @ -53,8 +56,20 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     public String getUrl() throws ParsingException { |     public String getUrl() throws ParsingException { | ||||||
|         return el.select("a[class*=\"yt-uix-tile-link\"]").first() |         String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first() | ||||||
|                 .attr("abs:href"); |                 .attr("abs:data-href"); | ||||||
|  | 
 | ||||||
|  |         Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)"); | ||||||
|  |         Matcher match = channelIdPattern.matcher(buttonTrackingUrl); | ||||||
|  | 
 | ||||||
|  |         if (match.matches()) { | ||||||
|  |             return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1); | ||||||
|  |         } else { | ||||||
|  |             // fallback method just in case youtube changes things; it should never run and tests will fail | ||||||
|  |             // provides an url with "/user/NAME", that is inconsistent with stream and channel extractor | ||||||
|  |             return el.select("a[class*=\"yt-uix-tile-link\"]").first() | ||||||
|  |                     .attr("abs:href"); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|  |  | ||||||
|  | @ -50,7 +50,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
|         try { |         try { | ||||||
|             return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); |             return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             throw new ParsingException("Could not get playlist name"); |             throw new ParsingException("Could not get playlist name", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -59,7 +59,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
|         try { |         try { | ||||||
|             return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); |             return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             throw new ParsingException("Could not get playlist thumbnail"); |             throw new ParsingException("Could not get playlist thumbnail", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -72,9 +72,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
|     @Override |     @Override | ||||||
|     public String getUploaderUrl() throws ParsingException { |     public String getUploaderUrl() throws ParsingException { | ||||||
|         try { |         try { | ||||||
|             return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href"); |             return YoutubeChannelExtractor.CHANNEL_URL_BASE + | ||||||
|  |                     doc.select("button[class*=\"yt-uix-subscription-button\"]") | ||||||
|  |                             .first().attr("data-channel-external-id"); | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             throw new ParsingException("Could not get playlist uploader name"); |             throw new ParsingException("Could not get playlist uploader url", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -83,7 +85,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
|         try { |         try { | ||||||
|             return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); |             return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             throw new ParsingException("Could not get playlist uploader name"); |             throw new ParsingException("Could not get playlist uploader name", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -92,7 +94,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
|         try { |         try { | ||||||
|             return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); |             return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             throw new ParsingException("Could not get playlist uploader avatar"); |             throw new ParsingException("Could not get playlist uploader avatar", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -248,6 +250,8 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||||
| 
 | 
 | ||||||
|                 @Override |                 @Override | ||||||
|                 public String getUploaderUrl() throws ParsingException { |                 public String getUploaderUrl() throws ParsingException { | ||||||
|  |                     // this url is not always in the form "/channel/..." | ||||||
|  |                     // sometimes Youtube provides urls in the from "/user/..." | ||||||
|                     return getUploaderLink().attr("abs:href"); |                     return getUploaderLink().attr("abs:href"); | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -107,6 +107,8 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { | ||||||
| 
 | 
 | ||||||
|     @Override |     @Override | ||||||
|     public String getUploaderUrl() throws ParsingException { |     public String getUploaderUrl() throws ParsingException { | ||||||
|  |         // this url is not always in the form "/channel/..." | ||||||
|  |         // sometimes Youtube provides urls in the from "/user/..." | ||||||
|         try { |         try { | ||||||
|             try { |             try { | ||||||
|                 return item.select("div[class=\"yt-lockup-byline\"]").first() |                 return item.select("div[class=\"yt-lockup-byline\"]").first() | ||||||
|  | @ -119,7 +121,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { | ||||||
|                     .text().split(" - ")[0]; |                     .text().split(" - ")[0]; | ||||||
|         } catch (Exception e) { |         } catch (Exception e) { | ||||||
|             System.out.println(item.html()); |             System.out.println(item.html()); | ||||||
|             throw new ParsingException("Could not get uploader", e); |             throw new ParsingException("Could not get uploader url", e); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -126,6 +126,8 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> { | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|                     private Element getUploaderLink() { |                     private Element getUploaderLink() { | ||||||
|  |                         // this url is not always in the form "/channel/..." | ||||||
|  |                         // sometimes Youtube provides urls in the from "/user/..." | ||||||
|                         Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); |                         Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); | ||||||
|                         return uploaderEl.select("a").first(); |                         return uploaderEl.select("a").first(); | ||||||
|                     } |                     } | ||||||
|  |  | ||||||
|  | @ -1,5 +1,6 @@ | ||||||
| package org.schabi.newpipe.extractor.services.soundcloud; | package org.schabi.newpipe.extractor.services.soundcloud; | ||||||
| 
 | 
 | ||||||
|  | import org.hamcrest.CoreMatchers; | ||||||
| import org.junit.BeforeClass; | import org.junit.BeforeClass; | ||||||
| import org.junit.Ignore; | import org.junit.Ignore; | ||||||
| import org.junit.Test; | import org.junit.Test; | ||||||
|  | @ -119,14 +120,14 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     public static class RandomHouseDanceMusic implements BasePlaylistExtractorTest { |     public static class RandomHouseMusic implements BasePlaylistExtractorTest { | ||||||
|         private static SoundcloudPlaylistExtractor extractor; |         private static SoundcloudPlaylistExtractor extractor; | ||||||
| 
 | 
 | ||||||
|         @BeforeClass |         @BeforeClass | ||||||
|         public static void setUp() throws Exception { |         public static void setUp() throws Exception { | ||||||
|             NewPipe.init(Downloader.getInstance(), new Localization("GB", "en")); |             NewPipe.init(Downloader.getInstance(), new Localization("GB", "en")); | ||||||
|             extractor = (SoundcloudPlaylistExtractor) SoundCloud |             extractor = (SoundcloudPlaylistExtractor) SoundCloud | ||||||
|                     .getPlaylistExtractor("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2"); |                     .getPlaylistExtractor("https://soundcloud.com/micky96/sets/house"); | ||||||
|             extractor.fetchPage(); |             extractor.fetchPage(); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -141,22 +142,22 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testName() { |         public void testName() { | ||||||
|             assertEquals("House, Electro , Dance Music 2", extractor.getName()); |             assertEquals("House", extractor.getName()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testId() { |         public void testId() { | ||||||
|             assertEquals("310980722", extractor.getId()); |             assertEquals("123062856", extractor.getId()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testUrl() throws Exception { |         public void testUrl() throws Exception { | ||||||
|             assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getUrl()); |             assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testOriginalUrl() throws Exception { |         public void testOriginalUrl() throws Exception { | ||||||
|             assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getOriginalUrl()); |             assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getOriginalUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         /*////////////////////////////////////////////////////////////////////////// |         /*////////////////////////////////////////////////////////////////////////// | ||||||
|  | @ -182,7 +183,7 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
|             assertIsSecureUrl(extractor.getThumbnailUrl()); |             assertIsSecureUrl(extractor.getThumbnailUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Ignore |         @Ignore("not implemented") | ||||||
|         @Test |         @Test | ||||||
|         public void testBannerUrl() { |         public void testBannerUrl() { | ||||||
|             assertIsSecureUrl(extractor.getBannerUrl()); |             assertIsSecureUrl(extractor.getBannerUrl()); | ||||||
|  | @ -192,12 +193,12 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
|         public void testUploaderUrl() { |         public void testUploaderUrl() { | ||||||
|             final String uploaderUrl = extractor.getUploaderUrl(); |             final String uploaderUrl = extractor.getUploaderUrl(); | ||||||
|             assertIsSecureUrl(uploaderUrl); |             assertIsSecureUrl(uploaderUrl); | ||||||
|             assertTrue(uploaderUrl, uploaderUrl.contains("hunter-leader")); |             assertThat(uploaderUrl, CoreMatchers.containsString("micky96")); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testUploaderName() { |         public void testUploaderName() { | ||||||
|             assertEquals("Gosu", extractor.getUploaderName()); |             assertEquals("_mickyyy", extractor.getUploaderName()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|  | @ -266,6 +267,7 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
|         // ListExtractor |         // ListExtractor | ||||||
|         //////////////////////////////////////////////////////////////////////////*/ |         //////////////////////////////////////////////////////////////////////////*/ | ||||||
| 
 | 
 | ||||||
|  |         @Ignore | ||||||
|         @Test |         @Test | ||||||
|         public void testRelatedItems() throws Exception { |         public void testRelatedItems() throws Exception { | ||||||
|             defaultTestRelatedItems(extractor, SoundCloud.getServiceId()); |             defaultTestRelatedItems(extractor, SoundCloud.getServiceId()); | ||||||
|  | @ -287,6 +289,7 @@ public class SoundcloudPlaylistExtractorTest { | ||||||
|         // PlaylistExtractor |         // PlaylistExtractor | ||||||
|         //////////////////////////////////////////////////////////////////////////*/ |         //////////////////////////////////////////////////////////////////////////*/ | ||||||
| 
 | 
 | ||||||
|  |         @Ignore | ||||||
|         @Test |         @Test | ||||||
|         public void testThumbnailUrl() { |         public void testThumbnailUrl() { | ||||||
|             assertIsSecureUrl(extractor.getThumbnailUrl()); |             assertIsSecureUrl(extractor.getThumbnailUrl()); | ||||||
|  |  | ||||||
|  | @ -100,7 +100,7 @@ public class YoutubePlaylistExtractorTest { | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testUploaderUrl() throws Exception { |         public void testUploaderUrl() throws Exception { | ||||||
|             assertTrue(extractor.getUploaderUrl().contains("youtube.com")); |             assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|  | @ -185,8 +185,8 @@ public class YoutubePlaylistExtractorTest { | ||||||
|         public void testMoreRelatedItems() throws Exception { |         public void testMoreRelatedItems() throws Exception { | ||||||
|             ListExtractor.InfoItemsPage<StreamInfoItem> currentPage |             ListExtractor.InfoItemsPage<StreamInfoItem> currentPage | ||||||
|                     = defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId()); |                     = defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId()); | ||||||
|             // Test for 2 more levels |  | ||||||
| 
 | 
 | ||||||
|  |             // test for 2 more levels | ||||||
|             for (int i = 0; i < 2; i++) { |             for (int i = 0; i < 2; i++) { | ||||||
|                 currentPage = extractor.getPage(currentPage.getNextPageUrl()); |                 currentPage = extractor.getPage(currentPage.getNextPageUrl()); | ||||||
|                 defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors()); |                 defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors()); | ||||||
|  | @ -214,7 +214,7 @@ public class YoutubePlaylistExtractorTest { | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testUploaderUrl() throws Exception { |         public void testUploaderUrl() throws Exception { | ||||||
|             assertTrue(extractor.getUploaderUrl().contains("youtube.com")); |             assertEquals("https://www.youtube.com/channel/UCHSPWoY1J5fbDVbcnyeqwdw", extractor.getUploaderUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|  |  | ||||||
|  | @ -81,7 +81,7 @@ public class YoutubeStreamExtractorDefaultTest { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testGetFullLinksInDescriptlion() throws ParsingException { |         public void testGetFullLinksInDescription() throws ParsingException { | ||||||
|             assertTrue(extractor.getDescription().contains("http://adele.com")); |             assertTrue(extractor.getDescription().contains("http://adele.com")); | ||||||
|             assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi...")); |             assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi...")); | ||||||
|         } |         } | ||||||
|  | @ -111,7 +111,7 @@ public class YoutubeStreamExtractorDefaultTest { | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|         public void testGetUploaderUrl() throws ParsingException { |         public void testGetUploaderUrl() throws ParsingException { | ||||||
|             assertTrue(extractor.getUploaderUrl().length() > 0); |             assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl()); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         @Test |         @Test | ||||||
|  |  | ||||||
|  | @ -1,5 +1,6 @@ | ||||||
| package org.schabi.newpipe.extractor.services.youtube.search; | package org.schabi.newpipe.extractor.services.youtube.search; | ||||||
| 
 | 
 | ||||||
|  | import org.hamcrest.CoreMatchers; | ||||||
| import org.junit.BeforeClass; | import org.junit.BeforeClass; | ||||||
| import org.junit.Ignore; | import org.junit.Ignore; | ||||||
| import org.junit.Test; | import org.junit.Test; | ||||||
|  | @ -63,4 +64,19 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     @Test | ||||||
|  |     public void testChannelUrl() { | ||||||
|  |         for(InfoItem item : itemsPage.getItems()) { | ||||||
|  |             if (item instanceof ChannelInfoItem) { | ||||||
|  |                 ChannelInfoItem channel = (ChannelInfoItem) item; | ||||||
|  | 
 | ||||||
|  |                 if (channel.getSubscriberCount() > 5e7) { // the real PewDiePie | ||||||
|  |                     assertEquals("https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", item.getUrl()); | ||||||
|  |                 } else { | ||||||
|  |                     assertThat(item.getUrl(), CoreMatchers.startsWith("https://www.youtube.com/channel/")); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -73,7 +73,7 @@ public class YoutubeSearchExtractorDefaultTest extends YoutubeSearchExtractorBas | ||||||
|         assertTrue((firstInfoItem instanceof ChannelInfoItem) |         assertTrue((firstInfoItem instanceof ChannelInfoItem) | ||||||
|                 || (secondInfoItem instanceof ChannelInfoItem)); |                 || (secondInfoItem instanceof ChannelInfoItem)); | ||||||
|         assertEquals("name", "PewDiePie", channelItem.getName()); |         assertEquals("name", "PewDiePie", channelItem.getName()); | ||||||
|         assertEquals("url","https://www.youtube.com/user/PewDiePie", channelItem.getUrl()); |         assertEquals("url", "https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", channelItem.getUrl()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Test |     @Test | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue