Merge remote-tracking branch 'origin/master'

This commit is contained in:
Philipp Hagemeister 2015-02-16 04:09:10 +01:00
commit 5bfd430f81
16 changed files with 163 additions and 48 deletions

View file

@ -110,3 +110,4 @@ Shaya Goldberg
Paul Hartmann Paul Hartmann
Frans de Jonge Frans de Jonge
Robin de Rooij Robin de Rooij
Ryan Schmidt

View file

@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5) self.assertTrue(len(subtitles.keys()) >= 6)
def test_list_subtitles(self): def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.expect_warning('Automatic Captions not supported by this server')
@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
def test_subtitles(self): def test_subtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self): def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['cs'])) self.assertEqual(set(subtitles.keys()), set(['cs']))
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self): def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')

View file

@ -189,6 +189,7 @@ from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE from .historicfilms import HistoricFilmsIE
from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE from .hostingbulk import HostingBulkIE

View file

@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
'duration': int(info['length']), 'duration': int(info['length']),
'view_count': int(info['views_total']), 'view_count': int(info['views_total']),
'uploader': info['username'], 'uploader': info['username'],
'uploader_id': info['uid'], 'uploader_id': info['owner']['uid'],
} }

View file

@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats, subtitles = self._download_media_selector(programme_id) formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee: except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
raise raise
# fallback to legacy playlist # fallback to legacy playlist

View file

@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://beeg.com/5416503', 'url': 'http://beeg.com/5416503',
'md5': '634526ae978711f6b748fe0dd6c11f57', 'md5': '1bff67111adb785c51d1b42959ec10e5',
'info_dict': { 'info_dict': {
'id': '5416503', 'id': '5416503',
'ext': 'mp4', 'ext': 'mp4',

View file

@ -16,7 +16,7 @@ from ..utils import (
class CamdemyIE(InfoExtractor): class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+)' _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# single file # single file
'url': 'http://www.camdemy.com/media/5181/', 'url': 'http://www.camdemy.com/media/5181/',

View file

@ -665,7 +665,7 @@ class InfoExtractor(object):
return RATING_TABLE.get(rating.lower(), None) return RATING_TABLE.get(rating.lower(), None)
def _family_friendly_search(self, html): def _family_friendly_search(self, html):
# See http://schema.org/VideoObj # See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html) family_friendly = self._html_search_meta('isFamilyFriendly', html)
if not family_friendly: if not family_friendly:

View file

@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
'id': '1740434', 'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf', 'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Hot Perky Blonde Naked Golf', 'title': 'hot perky blonde naked golf',
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
r'<source src="([^"]+)"', webpage, 'video URL') r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Free', webpage, 'title') [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
webpage, 'title')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'poster="([^"]+)"', r'poster="([^"]+)"',

View file

@ -1,52 +1,71 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
class FirstTVIE(InfoExtractor): class FirstTVIE(InfoExtractor):
IE_NAME = 'firsttv' IE_NAME = '1tv'
IE_DESC = 'Видеоархив - Первый канал' IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TEST = { _TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390', 'url': 'http://www.1tv.ru/videoarchive/73390',
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', 'md5': '777f525feeec4806130f4f764bc18a4f',
'info_dict': { 'info_dict': {
'id': '73390', 'id': '73390',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Олимпийские канатные дороги', 'title': 'Олимпийские канатные дороги',
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 149, 'duration': 149,
'like_count': int,
'dislike_count': int,
}, },
'skip': 'Only works from Russia', 'skip': 'Only works from Russia',
} }, {
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
'info_dict': {
'id': '35930',
'ext': 'mp4',
'title': 'Наедине со всеми. Людмила Сенчина',
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 2694,
},
'skip': 'Only works from Russia',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id, 'Downloading page') webpage = self._download_webpage(url, video_id, 'Downloading page')
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
webpage, 'video URL')
title = self._html_search_regex( title = self._html_search_regex(
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) duration = self._og_search_property(
'video:duration', webpage,
'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', like_count = self._html_search_regex(
webpage, 'like count', fatal=False) r'title="Понравилось".*?/></label> \[(\d+)\]',
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', webpage, 'like count', default=None)
webpage, 'dislike count', fatal=False) dislike_count = self._html_search_regex(
r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', default=None)
return { return {
'id': video_id, 'id': video_id,

View file

@ -0,0 +1,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class HistoryIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
'info_dict': {
'id': 'bLx5Dv5Aka1G',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
},
'add_ie': ['ThePlatform'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
webpage, 'video url')
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))

View file

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
class NBCNewsIE(InfoExtractor): class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
((video/.+?/(?P<id>\d+))| (?:video/.+?/(?P<id>\d+)|
(feature/[^/]+/(?P<title>.+))) (?:feature|nightly-news)/[^/]+/(?P<title>.+))
''' '''
_TESTS = [ _TESTS = [
@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
}, },
}, },
{
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
'info_dict': {
'id': 'sekXqyTVnmN3',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
},
},
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
} }
else: else:
# "feature" pages use theplatform.com # "feature" and "nightly-news" pages use theplatform.com
title = mobj.group('title') title = mobj.group('title')
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
bootstrap_json = self._search_regex( bootstrap_json = self._search_regex(
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
flags=re.MULTILINE) webpage, 'bootstrap json', flags=re.MULTILINE)
bootstrap = json.loads(bootstrap_json) bootstrap = self._parse_json(bootstrap_json, video_id)
info = bootstrap['results'][0]['video'] info = bootstrap['results'][0]['video']
mpxid = info['mpxId'] mpxid = info['mpxId']

View file

@ -1,14 +1,30 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals from __future__ import unicode_literals
import hashlib
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
) )
def _get_api_key(api_path):
if api_path.endswith('?'):
api_path = api_path[:-1]
api_key = 'fb5f58a820353bd7095de526253c14fd'
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
return hashlib.md5(a.encode('ascii')).hexdigest()
class StreamCZIE(InfoExtractor): class StreamCZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
_API_URL = 'http://www.stream.cz/API'
_TESTS = [{ _TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._download_json( api_path = '/episode/%s' % video_id
'http://www.stream.cz/API/episode/%s' % video_id, video_id)
req = compat_urllib_request.Request(self._API_URL + api_path)
req.add_header('Api-Password', _get_api_key(api_path))
data = self._download_json(req, video_id)
formats = [] formats = []
for quality, video in enumerate(data['video_qualities']): for quality, video in enumerate(data['video_qualities']):

View file

@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
formats = [] formats = []
quality = qualities(['mp4', 'flv']) quality = qualities(['mp4', 'flv'])
for video_url in re.findall(r'<source src="([^"]+)"', webpage): for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
video_ext = determine_ext(video_url) video_ext = determine_ext(video_url)
formats.append({ formats.append({
'url': video_url, 'url': video_url,

View file

@ -2,6 +2,11 @@ from __future__ import unicode_literals
import re import re
import json import json
import time
import hmac
import binascii
import hashlib
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..compat import ( from ..compat import (
@ -11,6 +16,7 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
xpath_with_ns, xpath_with_ns,
unsmuggle_url,
) )
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
class ThePlatformIE(SubtitlesInfoExtractor): class ThePlatformIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|theplatform:)(?P<id>[^/\?&]+)''' |theplatform:)(?P<id>[^/\?&]+)'''
@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor):
}, },
} }
@staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
flags = '10' if include_qs else '00'
expiration_date = '%x' % (int(time.time()) + life)
def str_to_hex(str):
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
def hex_to_str(hex):
return binascii.a2b_hex(hex)
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
provider_id = mobj.group('provider_id')
video_id = mobj.group('id') video_id = mobj.group('id')
if not provider_id:
provider_id = 'dJ5BDC'
if mobj.group('config'): if mobj.group('config'):
config_url = url + '&form=json' config_url = url + '&form=json'
config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('swf/', 'config/')
@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor):
config = self._download_json(config_url, video_id, 'Downloading config') config = self._download_json(config_url, video_id, 'Downloading config')
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else: else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
'format=smil&mbr=true'.format(video_id)) 'format=smil&mbr=true'.format(provider_id, video_id))
sig = smuggled_data.get('sig')
if sig:
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
meta = self._download_xml(smil_url, video_id) meta = self._download_xml(smil_url, video_id)
try: try:
@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
else: else:
raise ExtractorError(error_msg, expected=True) raise ExtractorError(error_msg, expected=True)
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id) info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
info_json = self._download_webpage(info_url, video_id) info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json) info = json.loads(info_json)

View file

@ -138,7 +138,7 @@ class FFmpegPostProcessor(PostProcessor):
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
stderr = stderr.decode('utf-8', 'replace') stderr = stderr.decode('utf-8', 'replace')
@ -178,8 +178,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
encodeArgument('-show_streams'), encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)] encodeFilename(self._ffmpeg_filename_argument(path), True)]
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd)) self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
output = handle.communicate()[0] output = handle.communicate()[0]
if handle.wait() != 0: if handle.wait() != 0:
return None return None