Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
99859d436c
10 changed files with 127 additions and 18 deletions
|
@ -26,9 +26,9 @@ tests = [
|
|||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
# 84
|
||||
# 84 - vflh9ybst 2013/08/23 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
|
||||
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
|
|
|
@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
|
|||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .hark import HarkIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .hypem import HypemIE
|
||||
|
@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
|
|||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .sina import SinaIE
|
||||
|
@ -116,12 +118,14 @@ _ALL_CLASSES = [
|
|||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
""" Return a list of an instance of every supported extractor.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
return [klass() for klass in _ALL_CLASSES]
|
||||
|
||||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
return globals()[ie_name+'IE']
|
||||
|
|
|
@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
|
|||
|
||||
_TEST ={
|
||||
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||
u'file': u'93440716.mp4',
|
||||
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
|
||||
u'file': u'93440716.flv',
|
||||
u'md5': u'e59995ac63d0457783ea05f93f12a866',
|
||||
u'info_dict': {
|
||||
u'title': u'网事知多少 第32期:车怒',
|
||||
},
|
||||
|
|
|
@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
|
|||
u'file': u'x33vw9.mp4',
|
||||
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
|
||||
u'info_dict': {
|
||||
u"uploader": u"Alex and Van .",
|
||||
u"uploader": u"Amphora Alex and Van .",
|
||||
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,12 +7,14 @@ from .common import InfoExtractor
|
|||
from ..utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
IE_DESC = u'Generic downloader that works on some sites'
|
||||
_VALID_URL = r'.*'
|
||||
|
@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
|
|||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
# Look for BrigthCove:
|
||||
# Look for BrightCove:
|
||||
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||
if m_brightcove is not None:
|
||||
self.to_screen(u'Brightcove video detected.')
|
||||
|
@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
|
|||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if video_url.startswith('//'):
|
||||
video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
|
||||
if '://' not in video_url:
|
||||
video_url = url + ('' if url.endswith('/') else '/') + video_url
|
||||
video_id = os.path.basename(video_url)
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
|
|
35
youtube_dl/extractor/hark.py
Normal file
35
youtube_dl/extractor/hark.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
class HarkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
|
||||
_TEST = {
|
||||
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||
u'file': u'mmbzyhkgny.mp3',
|
||||
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
|
||||
u'info_dict': {
|
||||
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
final_url = self._search_regex(r'src="(.+?).mp3"',
|
||||
webpage, 'video url')+'.mp3'
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
|
||||
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
|
||||
|
||||
return {'id': video_id,
|
||||
'url' : final_url,
|
||||
'title': title,
|
||||
'ext': determine_ext(final_url),
|
||||
}
|
42
youtube_dl/extractor/ro220.py
Normal file
42
youtube_dl/extractor/ro220.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class Ro220IE(InfoExtractor):
|
||||
IE_NAME = '220.ro'
|
||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||
u'file': u'LYV6doKo7f.mp4',
|
||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
||||
u'info_dict': {
|
||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
flashVars_str = self._search_regex(
|
||||
r'<param name="flashVars" value="([^"]+)"',
|
||||
webpage, u'flashVars')
|
||||
flashVars = compat_parse_qs(flashVars_str)
|
||||
|
||||
info = {
|
||||
'_type': 'video',
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': flashVars['videoURL'][0],
|
||||
'title': flashVars['title'][0],
|
||||
'description': clean_html(flashVars['desc'][0]),
|
||||
'thumbnail': flashVars['preview'][0],
|
||||
}
|
||||
return info
|
|
@ -8,8 +8,8 @@ from ..utils import (
|
|||
)
|
||||
|
||||
class RTLnowIE(InfoExtractor):
|
||||
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
"""Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
|
||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||
u'file': u'90419.flv',
|
||||
|
@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
|
|||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
||||
u'file': u'99205.flv',
|
||||
u'info_dict': {
|
||||
u'upload_date': u'20080928',
|
||||
u'title': u'Medicopter 117 - Angst!',
|
||||
u'description': u'Angst!',
|
||||
u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
|
||||
},
|
||||
u'params': {
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self,url):
|
||||
|
|
|
@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
elif len(s) == 85:
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
elif len(s) == 84:
|
||||
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
|
||||
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
|
||||
elif len(s) == 83:
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
|
|
|
@ -476,7 +476,7 @@ def formatSeconds(secs):
|
|||
def make_HTTPS_handler(opts):
|
||||
if sys.version_info < (3,2):
|
||||
# Python's 2.x handler is very simplistic
|
||||
return compat_urllib_request.HTTPSHandler()
|
||||
return YoutubeDLHandlerHTTPS()
|
||||
else:
|
||||
import ssl
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||
|
@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
|
|||
context.verify_mode = (ssl.CERT_NONE
|
||||
if opts.no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
return compat_urllib_request.HTTPSHandler(context=context)
|
||||
return YoutubeDLHandlerHTTPS(context=context)
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Error during info extraction."""
|
||||
|
@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
|
|||
self.downloaded = downloaded
|
||||
self.expected = expected
|
||||
|
||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
|
||||
class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||
"""Handler for HTTP requests and responses.
|
||||
|
||||
This class, when installed with an OpenerDirector, automatically adds
|
||||
|
@ -602,7 +603,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||
ret.code = code
|
||||
return ret
|
||||
|
||||
def http_request(self, req):
|
||||
def _http_request(self, req):
|
||||
for h, v in std_headers.items():
|
||||
if h in req.headers:
|
||||
del req.headers[h]
|
||||
|
@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||
del req.headers['Youtubedl-user-agent']
|
||||
return req
|
||||
|
||||
def http_response(self, req, resp):
|
||||
def _http_response(self, req, resp):
|
||||
old_resp = resp
|
||||
# gzip
|
||||
if resp.headers.get('Content-encoding', '') == 'gzip':
|
||||
|
@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||
resp.msg = old_resp.msg
|
||||
return resp
|
||||
|
||||
https_request = http_request
|
||||
https_response = http_response
|
||||
|
||||
class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
|
||||
http_request = YoutubeDLHandler_Template._http_request
|
||||
http_response = YoutubeDLHandler_Template._http_response
|
||||
|
||||
|
||||
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
|
||||
https_request = YoutubeDLHandler_Template._http_request
|
||||
https_response = YoutubeDLHandler_Template._http_response
|
||||
|
||||
|
||||
def unified_strdate(date_str):
|
||||
"""Return a string with the date in the format YYYYMMDD"""
|
||||
|
|
Loading…
Reference in a new issue