parent
63c9b2c1d9
commit
4e262a8838
2 changed files with 41 additions and 5 deletions
|
@ -296,9 +296,11 @@ class InfoExtractor(object):
|
||||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
|
if prefix is not None:
|
||||||
|
webpage_bytes = prefix + webpage_bytes
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
encoding = m.group(1)
|
encoding = m.group(1)
|
||||||
|
|
|
@ -452,7 +452,23 @@ class GenericIE(InfoExtractor):
|
||||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
|
},
|
||||||
|
# Direct link with incorrect MIME type
|
||||||
|
{
|
||||||
|
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||||
|
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
||||||
|
'info_dict': {
|
||||||
|
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||||
|
'id': '5_Lennart_Poettering_-_Systemd',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': '5_Lennart_Poettering_-_Systemd',
|
||||||
|
'upload_date': '20141120',
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'URL could be a direct video link, returning it as such.'
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -606,10 +622,28 @@ class GenericIE(InfoExtractor):
|
||||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||||
|
|
||||||
if full_response:
|
if not full_response:
|
||||||
webpage = self._webpage_read_content(full_response, url, video_id)
|
full_response = self._request_webpage(url, video_id)
|
||||||
else:
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
# Maybe it's a direct link to a video?
|
||||||
|
# Be careful not to download the whole thing!
|
||||||
|
first_bytes = full_response.read(512)
|
||||||
|
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'URL could be a direct video link, returning it as such.')
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
head_response.headers.get('Last-Modified'))
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': os.path.splitext(url_basename(url))[0],
|
||||||
|
'direct': True,
|
||||||
|
'url': url,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
webpage = self._webpage_read_content(
|
||||||
|
full_response, url, video_id, prefix=first_bytes)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed?
|
# Is it an RSS feed?
|
||||||
|
|
Loading…
Reference in a new issue