Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
85f03346eb
12 changed files with 101 additions and 28 deletions
|
@ -4,8 +4,12 @@ __youtube-dl()
|
|||
COMPREPLY=()
|
||||
cur="${COMP_WORDS[COMP_CWORD]}"
|
||||
opts="{{flags}}"
|
||||
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
|
||||
|
||||
if [[ ${cur} == * ]] ; then
|
||||
if [[ ${cur} =~ : ]]; then
|
||||
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
|
||||
return 0
|
||||
elif [[ ${cur} == * ]] ; then
|
||||
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
|
||||
return 0
|
||||
fi
|
||||
|
|
|
@ -20,15 +20,15 @@ tests = [
|
|||
# 87
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
|
||||
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
|
||||
# 86 - vflh9ybst 2013/08/23
|
||||
# 86 - vflHOr_nV 2013/08/30
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
|
||||
"?;}|[{=+._)(*&^%$#@!MNBqCXZASDFGHJKLPOIUYTREWQ<987654321mnbvcxzasdfghjklpoiuytrew"),
|
||||
# 85
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||
# 84 - vflh9ybst 2013/08/23 (sporadic)
|
||||
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
|
||||
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||
# 83
|
||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||
|
|
|
@ -127,12 +127,11 @@ def generator(test_case):
|
|||
info_dict = json.load(infof)
|
||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
|
||||
got = 'md5:' + md5(info_dict.get(info_field))
|
||||
else:
|
||||
got = info_dict.get(info_field)
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
|
|
|
@ -59,6 +59,7 @@ from .myvideo import MyVideoIE
|
|||
from .nba import NBAIE
|
||||
from .nbc import NBCNewsIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pornotube import PornotubeIE
|
||||
|
|
|
@ -150,7 +150,7 @@ class InfoExtractor(object):
|
|||
if m:
|
||||
encoding = m.group(1)
|
||||
else:
|
||||
m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
|
||||
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
|
||||
webpage_bytes[:1024])
|
||||
if m:
|
||||
encoding = m.group(1).decode('ascii')
|
||||
|
|
|
@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
|
|||
Some videos of it.ign.com are also supported
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = u'ign.com'
|
||||
|
||||
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
||||
|
@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name_or_id = mobj.group('name_or_id')
|
||||
page_type = mobj.group('type')
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type == 'articles':
|
||||
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
|
||||
return self.url_result(video_url, ie='IGN')
|
||||
video_id = self._find_video_id(webpage)
|
||||
result = self._get_video_info(video_id)
|
||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||
|
@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
|
|||
class OneUPIE(IGNIE):
|
||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
||||
|
||||
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
|
|
@ -25,23 +25,21 @@ class TechTVMITIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(
|
||||
raw_page = self._download_webpage(
|
||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||
embed_page = self._download_webpage(
|
||||
'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
|
||||
note=u'Downloading embed page')
|
||||
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
|
||||
|
||||
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
|
||||
embed_page, u'base url')
|
||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
|
||||
raw_page, u'base url')
|
||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
|
||||
u'video formats')
|
||||
formats = json.loads(formats_json)
|
||||
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||
|
||||
title = get_element_by_id('edit-title', webpage)
|
||||
description = clean_html(get_element_by_id('edit-description', webpage))
|
||||
title = get_element_by_id('edit-title', clean_page)
|
||||
description = clean_html(get_element_by_id('edit-description', clean_page))
|
||||
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
|
||||
embed_page, u'thumbnail', flags=re.DOTALL)
|
||||
raw_page, u'thumbnail', flags=re.DOTALL)
|
||||
|
||||
return {'id': video_id,
|
||||
'title': title,
|
||||
|
|
67
youtube_dl/extractor/orf.py
Normal file
67
youtube_dl/extractor/orf.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
# coding: utf-8
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
class ORFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
|
||||
u'file': u'6566957.flv',
|
||||
u'info_dict': {
|
||||
u'title': u'Wetter',
|
||||
u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
|
||||
},
|
||||
u'params': {
|
||||
# It uses rtmp
|
||||
u'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
|
||||
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
|
||||
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
|
||||
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
|
||||
playlist = json.loads(playlist_json)
|
||||
|
||||
videos = []
|
||||
ns = '{http://tempuri.org/XMLSchema.xsd}'
|
||||
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
|
||||
webpage_description = self._og_search_description(webpage)
|
||||
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
|
||||
# Get best quality url
|
||||
rtmp_url = None
|
||||
for q in ['Q6A', 'Q4A', 'Q1A']:
|
||||
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
|
||||
if video_url is not None:
|
||||
rtmp_url = video_url.text
|
||||
break
|
||||
if rtmp_url is None:
|
||||
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
|
||||
description = self._html_search_regex(
|
||||
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
|
||||
u'description', default=webpage_description, flags=re.DOTALL)
|
||||
videos.append({
|
||||
'_type': 'video',
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
'url': rtmp_url,
|
||||
'ext': 'flv',
|
||||
'description': description,
|
||||
})
|
||||
|
||||
return videos
|
|
@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
|
|||
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
|
||||
u'info_dict': {
|
||||
u'title': u'M!ss Yella',
|
||||
u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
|
||||
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
@ -346,7 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
u"info_dict": {
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
|
||||
u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
}
|
||||
|
@ -434,11 +434,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
elif len(s) == 87:
|
||||
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
|
||||
elif len(s) == 86:
|
||||
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
|
||||
return s[81:73:-1] + s[84] + s[72:58:-1] + s[0] + s[57:35:-1] + s[85] + s[34:0:-1]
|
||||
elif len(s) == 85:
|
||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||
elif len(s) == 84:
|
||||
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
|
||||
return s[81:36:-1] + s[0] + s[35:2:-1]
|
||||
elif len(s) == 83:
|
||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||
elif len(s) == 82:
|
||||
|
@ -1184,7 +1184,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
|||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
IE_NAME = u'youtube:favorites'
|
||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
|
||||
_LOGIN_REQUIRED = True
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -213,7 +213,7 @@ if sys.version_info >= (2,7):
|
|||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z@\s]*$', val)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
|
||||
__version__ = '2013.08.28.1'
|
||||
__version__ = '2013.08.30'
|
||||
|
|
Loading…
Reference in a new issue