From 41333b97b9471316cf0f395db59196e6571fc776 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 12 May 2015 22:35:16 +0800 Subject: [PATCH 1/4] [qqmusic] Add support for charts / top lists --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/qqmusic.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index de19dfd7a..8ec0c1032 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -414,6 +414,7 @@ from .qqmusic import ( QQMusicIE, QQMusicSingerIE, QQMusicAlbumIE, + QQMusicToplistIE, ) from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 174c8e0ae..d4a85d8c3 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + js_to_json, ) from ..compat import compat_urllib_request @@ -168,3 +169,57 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): album_page, 'album details', default=None) return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' + + _TESTS = [{ + 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'info_dict': { + 'id': 'global_12', + 'title': 'itunes榜', + }, + 'playlist_count': 10, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'info_dict': { + 'id': 'top_6', + 'title': 'QQ音乐巅峰榜·欧美', + }, + 'playlist_count': 100, + }] + + @staticmethod + def strip_qq_jsonp(code): + return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_type = list_id.split("_")[0] + num_id = list_id.split("_")[1] + + list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + if list_type == 'top': + list = self._download_json( + "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + elif list_type == 'global': + entries = self.get_entries_from_page(list_page) + + list_name = self._html_search_regex( + r'

([^\']+)

', list_page, 'top list name', + default=None) + list_desc = None + + return self.playlist_result(entries, list_id, list_name, list_desc) \ No newline at end of file From b480e7874b45862eae343ab8484aa43381cd28fa Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 12 May 2015 22:41:37 +0800 Subject: [PATCH 2/4] [qqmusic] Fix code formatting --- youtube_dl/extractor/qqmusic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index d4a85d8c3..bca4a8f90 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -200,12 +200,15 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_type = list_id.split("_")[0] num_id = list_id.split("_")[1] - list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + list_page = self._download_webpage( + "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, + list_id, 'Download toplist page') entries = [] if list_type == 'top': list = self._download_json( "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, - list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', + transform_source=self.strip_qq_jsonp) for song in list['l']: s = song['s'] @@ -222,4 +225,5 @@ class QQMusicToplistIE(QQPlaylistBaseIE): default=None) list_desc = None - return self.playlist_result(entries, list_id, list_name, list_desc) \ No newline at end of file + return self.playlist_result(entries, list_id, list_name, list_desc) + \ No newline at end of file From fd4eefed39595850b864d3be9711224e4e8e9dd4 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 13 May 2015 01:14:02 +0800 Subject: [PATCH 3/4] [qqmusic] Fix extraction for global list --- youtube_dl/extractor/qqmusic.py | 34 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bca4a8f90..3401dcaef 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -188,6 +188,13 @@ class QQMusicToplistIE(QQPlaylistBaseIE): 'title': 'QQ音乐巅峰榜·欧美', }, 'playlist_count': 100, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=global_5', + 'info_dict': { + 'id': 'global_5', + 'title': '韩国mnet排行榜', + }, + 'playlist_count': 50, }] @staticmethod @@ -203,22 +210,23 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_page = self._download_webpage( "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + jsonp_url = "" if list_type == 'top': - list = self._download_json( - "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, - list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', - transform_source=self.strip_qq_jsonp) - - for song in list['l']: - s = song['s'] - song_mid = s.split("|")[20] - entries.append(self.url_result( - 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', - song_mid)) - + jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id elif list_type == 'global': - entries = self.get_entries_from_page(list_page) + jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id + + list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json', + errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) list_name = self._html_search_regex( r'

([^\']+)

', list_page, 'top list name', From 86ec1e487c4908f4d0d0ece512007a2e5fedc593 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 13 May 2015 01:37:56 +0800 Subject: [PATCH 4/4] [qqmusic] Code fixes --- youtube_dl/extractor/qqmusic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 3401dcaef..bae2ce31a 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -212,10 +212,9 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_id, 'Download toplist page') entries = [] - jsonp_url = "" if list_type == 'top': jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id - elif list_type == 'global': + else: jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json',