[xiami] Improve extraction (Closes #9079)
* Switch to JSON source * Add abstract IE for playlists * Extract more track related metadata
This commit is contained in:
parent
89c0dc9a5f
commit
4e0c0c1508
2 changed files with 96 additions and 99 deletions
|
@ -942,7 +942,7 @@ from .xhamster import (
|
||||||
XHamsterEmbedIE,
|
XHamsterEmbedIE,
|
||||||
)
|
)
|
||||||
from .xiami import (
|
from .xiami import (
|
||||||
XiamiIE,
|
XiamiSongIE,
|
||||||
XiamiAlbumIE,
|
XiamiAlbumIE,
|
||||||
XiamiArtistIE,
|
XiamiArtistIE,
|
||||||
XiamiCollectionIE
|
XiamiCollectionIE
|
||||||
|
|
|
@ -1,50 +1,42 @@
|
||||||
# -*- coding: utf-8 -*-
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
xpath_element,
|
|
||||||
xpath_text,
|
|
||||||
xpath_with_ns,
|
|
||||||
int_or_none,
|
|
||||||
ExtractorError
|
|
||||||
)
|
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class XiamiBaseIE(InfoExtractor):
|
class XiamiBaseIE(InfoExtractor):
|
||||||
|
_API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
|
||||||
|
|
||||||
_XML_BASE_URL = 'http://www.xiami.com/song/playlist/id'
|
def _extract_track(self, track, track_id=None):
|
||||||
_NS_MAP = {'xm': 'http://xspf.org/ns/0/'}
|
title = track['title']
|
||||||
|
track_url = self._decrypt(track['location'])
|
||||||
|
|
||||||
def _extract_track(self, track):
|
subtitles = {}
|
||||||
artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='')
|
lyrics_url = track.get('lyric_url') or track.get('lyric')
|
||||||
artist = artist.split(';')
|
if lyrics_url and lyrics_url.startswith('http'):
|
||||||
|
subtitles['origin'] = [{'url': lyrics_url}]
|
||||||
|
|
||||||
ret = {
|
return {
|
||||||
'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)),
|
'id': track.get('song_id') or track_id,
|
||||||
'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)),
|
'url': track_url,
|
||||||
'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)),
|
'title': title,
|
||||||
'artist': ';'.join(artist) if artist else None,
|
'thumbnail': track.get('pic') or track.get('album_pic'),
|
||||||
'creator': artist[0] if artist else None,
|
'duration': int_or_none(track.get('length')),
|
||||||
'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))),
|
'creator': track.get('artist', '').split(';')[0],
|
||||||
'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None),
|
'track': title,
|
||||||
'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))),
|
'album': track.get('album_name'),
|
||||||
|
'artist': track.get('artist'),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP))
|
def _extract_tracks(self, item_id, typ=None):
|
||||||
if lyrics_url and lyrics_url.endswith('.lrc'):
|
playlist = self._download_json(
|
||||||
ret['description'] = self._download_webpage(lyrics_url, ret['id'])
|
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
|
||||||
return ret
|
return [
|
||||||
|
self._extract_track(track, item_id)
|
||||||
def _extract_xml(self, _id, typ=''):
|
for track in playlist['data']['trackList']]
|
||||||
playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id)
|
|
||||||
tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP))
|
|
||||||
|
|
||||||
if not len(tracklist):
|
|
||||||
raise ExtractorError('No track found')
|
|
||||||
return [self._extract_track(track) for track in tracklist]
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decrypt(origin):
|
def _decrypt(origin):
|
||||||
|
@ -67,70 +59,82 @@ class XiamiBaseIE(InfoExtractor):
|
||||||
return compat_urllib_parse_unquote(ans).replace('^', '0')
|
return compat_urllib_parse_unquote(ans).replace('^', '0')
|
||||||
|
|
||||||
|
|
||||||
class XiamiIE(XiamiBaseIE):
|
class XiamiSongIE(XiamiBaseIE):
|
||||||
IE_NAME = 'xiami:song'
|
IE_NAME = 'xiami:song'
|
||||||
IE_DESC = '虾米音乐'
|
IE_DESC = '虾米音乐'
|
||||||
_VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
|
||||||
'url': 'http://www.xiami.com/song/1775610518',
|
'url': 'http://www.xiami.com/song/1775610518',
|
||||||
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
'md5': '521dd6bea40fd5c9c69f913c232cb57e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1775610518',
|
'id': '1775610518',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Woman',
|
'title': 'Woman',
|
||||||
'creator': 'HONNE',
|
|
||||||
'album': 'Woman',
|
|
||||||
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||||
'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b',
|
'duration': 265,
|
||||||
}
|
'creator': 'HONNE',
|
||||||
|
'track': 'Woman',
|
||||||
|
'album': 'Woman',
|
||||||
|
'artist': 'HONNE',
|
||||||
|
'subtitles': {
|
||||||
|
'origin': [{
|
||||||
|
'ext': 'lrc',
|
||||||
|
}],
|
||||||
},
|
},
|
||||||
{
|
}
|
||||||
|
}, {
|
||||||
'url': 'http://www.xiami.com/song/1775256504',
|
'url': 'http://www.xiami.com/song/1775256504',
|
||||||
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
|
'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1775256504',
|
'id': '1775256504',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '悟空',
|
'title': '悟空',
|
||||||
|
'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg',
|
||||||
|
'duration': 200,
|
||||||
'creator': '戴荃',
|
'creator': '戴荃',
|
||||||
|
'track': '悟空',
|
||||||
'album': '悟空',
|
'album': '悟空',
|
||||||
'description': 'md5:206e67e84f9bed1d473d04196a00b990',
|
'artist': '戴荃',
|
||||||
}
|
'subtitles': {
|
||||||
|
'origin': [{
|
||||||
|
'ext': 'lrc',
|
||||||
|
}],
|
||||||
},
|
},
|
||||||
]
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
_id = self._match_id(url)
|
return self._extract_tracks(self._match_id(url))[0]
|
||||||
return self._extract_xml(_id)[0]
|
|
||||||
|
|
||||||
|
|
||||||
class XiamiAlbumIE(XiamiBaseIE):
|
class XiamiPlaylistBaseIE(XiamiBaseIE):
|
||||||
|
def _real_extract(self, url):
|
||||||
|
item_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
|
||||||
|
|
||||||
|
|
||||||
|
class XiamiAlbumIE(XiamiPlaylistBaseIE):
|
||||||
IE_NAME = 'xiami:album'
|
IE_NAME = 'xiami:album'
|
||||||
IE_DESC = '虾米音乐 - 专辑'
|
IE_DESC = '虾米音乐 - 专辑'
|
||||||
_VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TYPE = '1'
|
||||||
{
|
_TESTS = [{
|
||||||
'url': 'http://www.xiami.com/album/2100300444',
|
'url': 'http://www.xiami.com/album/2100300444',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2100300444',
|
'id': '2100300444',
|
||||||
},
|
},
|
||||||
'playlist_count': 10,
|
'playlist_count': 10,
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}]
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
_id = self._match_id(url)
|
|
||||||
return self.playlist_result(self._extract_xml(_id, '/type/1'), _id)
|
|
||||||
|
|
||||||
|
|
||||||
class XiamiArtistIE(XiamiBaseIE):
|
class XiamiArtistIE(XiamiPlaylistBaseIE):
|
||||||
IE_NAME = 'xiami:artist'
|
IE_NAME = 'xiami:artist'
|
||||||
IE_DESC = '虾米音乐 - 歌手'
|
IE_DESC = '虾米音乐 - 歌手'
|
||||||
_VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P<id>[0-9]+)'
|
||||||
|
_TYPE = '2'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -139,23 +143,16 @@ class XiamiArtistIE(XiamiBaseIE):
|
||||||
'playlist_count': 20,
|
'playlist_count': 20,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
_id = self._match_id(url)
|
|
||||||
return self.playlist_result(self._extract_xml(_id, '/type/2'), _id)
|
|
||||||
|
|
||||||
|
class XiamiCollectionIE(XiamiPlaylistBaseIE):
|
||||||
class XiamiCollectionIE(XiamiBaseIE):
|
|
||||||
IE_NAME = 'xiami:collection'
|
IE_NAME = 'xiami:collection'
|
||||||
IE_DESC = '虾米音乐 - 精选集'
|
IE_DESC = '虾米音乐 - 精选集'
|
||||||
_VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P<id>[0-9]+)'
|
||||||
|
_TYPE = '3'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '156527391',
|
'id': '156527391',
|
||||||
},
|
},
|
||||||
'playlist_count': 26,
|
'playlist_mincount': 29,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
_id = self._match_id(url)
|
|
||||||
return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)
|
|
||||||
|
|
Loading…
Reference in a new issue