Add an extractor for Clipsyndicate (closes #1744)
This commit is contained in:
parent
b03d0d064c
commit
3862402ff3
2 changed files with 53 additions and 0 deletions
|
@ -21,6 +21,7 @@ from .canalplus import CanalplusIE
|
|||
from .canalc2 import Canalc2IE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .cnn import CNNIE
|
||||
from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
|
|
52
youtube_dl/extractor/clipsyndicate.py
Normal file
52
youtube_dl/extractor/clipsyndicate.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class ClipsyndicateIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
||||
u'info_dict': {
|
||||
u'id': u'4629301',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Brick Briscoe',
|
||||
u'duration': 612,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
js_player = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||
video_id, u'Downlaoding player')
|
||||
# it includes a required token
|
||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
||||
|
||||
playlist_page = self._download_webpage(
|
||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||
video_id, u'Downloading video info')
|
||||
# Fix broken xml
|
||||
playlist_page = re.sub('&', '&', playlist_page)
|
||||
pdoc = xml.etree.ElementTree.fromstring(playlist_page.encode('utf-8'))
|
||||
|
||||
track_doc = pdoc.find('trackList/track')
|
||||
def find_param(name):
|
||||
node = find_xpath_attr(track_doc, './/param', 'name', name)
|
||||
if node is not None:
|
||||
return node.attrib['value']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': find_param('title'),
|
||||
'url': track_doc.find('location').text,
|
||||
'thumbnail': find_param('thumbnail'),
|
||||
'duration': int(find_param('duration')),
|
||||
}
|
Loading…
Reference in a new issue