[cinemassacre] Keep both extraction approaches and make more robust (Closes #4109)

This commit is contained in:
Sergey M․ 2014-11-05 21:32:46 +07:00
parent 81b22aee8b
commit dab647a7b6

View file

@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
if not mobj: if not mobj:
raise ExtractorError('Can\'t extract embed url and video id') raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url') playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
full_video_id = mobj.group('full_video_id')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title') r'<title>(?P<title>.+?)\|', webpage, 'title')
@ -60,37 +61,52 @@ class CinemassacreIE(InfoExtractor):
vidurl = self._search_regex( vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
videolist_url = self._search_regex( videolist_url = None
r"file\s*:\s*'(http.+?/jwplayer\.smil)'", playerdata, 'jwplayer.smil')
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
formats = [] mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata)
baseurl = vidurl[:vidurl.rfind('/')+1] if mobj:
for video in videolist.findall('.//video'): videoserver = mobj.group('videoserver')
src = video.get('src') mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
if not src: vidid = mobj.group('vidid') if mobj else full_video_id
continue videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
file_ = src.partition(':')[-1] else:
width = int_or_none(video.get('width')) mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
height = int_or_none(video.get('height')) if mobj:
bitrate = int_or_none(video.get('system-bitrate')) videolist_url = mobj.group('smil')
format = {
'url': baseurl + file_, if videolist_url:
'format_id': src.rpartition('.')[0].rpartition('_')[-1], videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
} formats = []
if width or height: baseurl = vidurl[:vidurl.rfind('/')+1]
format.update({ for video in videolist.findall('.//video'):
'tbr': bitrate // 1000 if bitrate else None, src = video.get('src')
'width': width, if not src:
'height': height, continue
}) file_ = src.partition(':')[-1]
else: width = int_or_none(video.get('width'))
format.update({ height = int_or_none(video.get('height'))
'abr': bitrate // 1000 if bitrate else None, bitrate = int_or_none(video.get('system-bitrate'))
'vcodec': 'none', format = {
}) 'url': baseurl + file_,
formats.append(format) 'format_id': src.rpartition('.')[0].rpartition('_')[-1],
self._sort_formats(formats) }
if width or height:
format.update({
'tbr': bitrate // 1000 if bitrate else None,
'width': width,
'height': height,
})
else:
format.update({
'abr': bitrate // 1000 if bitrate else None,
'vcodec': 'none',
})
formats.append(format)
self._sort_formats(formats)
else:
formats = [{
'url': vidurl,
}]
return { return {
'id': video_id, 'id': video_id,