[huffpost] Add support
This commit is contained in:
parent
0f2999fe2b
commit
db1f388878
5 changed files with 83 additions and 3 deletions
|
@ -1,3 +1,5 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
|
@ -12,10 +14,11 @@ from ..utils import (
|
|||
def get_suitable_downloader(info_dict):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
url = info_dict['url']
|
||||
protocol = info_dict.get('protocol')
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if determine_ext(url) == u'm3u8':
|
||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return MplayerFD
|
||||
|
|
|
@ -83,6 +83,7 @@ from .googlesearch import GoogleSearchIE
|
|||
from .hark import HarkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .imdb import (
|
||||
|
|
|
@ -71,7 +71,7 @@ class InfoExtractor(object):
|
|||
* player_url SWF Player URL (used for rtmpdump).
|
||||
* protocol The protocol that will be used for the actual
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp" or so.
|
||||
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field.
|
||||
|
|
|
@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):
|
|||
|
||||
# Look for embedded Facebook player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Facebook')
|
||||
|
||||
# Look for embedded Huffington Post player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
|
70
youtube_dl/extractor/huffpost.py
Normal file
70
youtube_dl/extractor/huffpost.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class HuffPostIE(InfoExtractor):
|
||||
IE_DESC = 'Huffington Post'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(embed\.)?live\.huffingtonpost\.com/
|
||||
(?:
|
||||
r/segment/[^/]+/|
|
||||
HPLEmbedPlayer/\?segmentId=
|
||||
)
|
||||
(?P<id>[0-9a-f]+)'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
|
||||
'file': '52dd3e4b02a7602131000677.mp4',
|
||||
'md5': 'TODO',
|
||||
'info_dict': {
|
||||
'title': 'TODO',
|
||||
'description': 'TODO',
|
||||
'duration': 1549,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
|
||||
data = self._download_json(api_url, video_id)['data']
|
||||
|
||||
video_title = data['title']
|
||||
duration = parse_duration(data['running_time'])
|
||||
upload_date = unified_strdate(data['schedule']['started_at'])
|
||||
|
||||
thumbnails = []
|
||||
for url in data['images'].values():
|
||||
m = re.match('.*-([0-9]+x[0-9]+)\.', url)
|
||||
if not m:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': url,
|
||||
'resolution': m.group(1),
|
||||
})
|
||||
|
||||
formats = [{
|
||||
'format': key,
|
||||
'format_id': key.replace('/', '.'),
|
||||
'ext': 'mp4',
|
||||
'url': url,
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
} for key, url in data['sources']['live'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
Loading…
Reference in a new issue