from __future__ import unicode_literals import re import json from .common import InfoExtractor from ..compat import ( compat_str, ) from ..utils import ExtractorError class MySpaceIE(InfoExtractor): _VALID_URL = r'https?://myspace\.com/([^/]+)/(?Pvideo/[^/]+/|music/song/.*?)(?P\d+)' _TESTS = [ { 'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689', 'info_dict': { 'id': '100008689', 'ext': 'flv', 'title': 'Viva La Vida', 'description': 'The official Viva La Vida video, directed by Hype Williams', 'uploader': 'Coldplay', 'uploader_id': 'coldplay', }, 'params': { # rtmp download 'skip_download': True, }, }, # song { 'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242', 'info_dict': { 'id': '39008454', 'ext': 'flv', 'title': 'Darkness In My Heart', 'uploader_id': 'spiderbags', }, 'params': { # rtmp download 'skip_download': True, }, }, ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) player_url = self._search_regex( r'playerSwf":"([^"?]*)', webpage, 'player URL') if mobj.group('mediatype').startswith('music/song'): # songs don't store any useful info in the 'context' variable song_data = self._search_regex( r'''