Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
This commit is contained in:
parent
baa7b1978b
commit
7cc3570e53
2 changed files with 57 additions and 40 deletions
|
@ -154,27 +154,38 @@ class InfoExtractor(object):
|
||||||
def IE_NAME(self):
|
def IE_NAME(self):
|
||||||
return type(self).__name__[:-2]
|
return type(self).__name__[:-2]
|
||||||
|
|
||||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns the response handle """
|
""" Returns the response handle """
|
||||||
if note is None:
|
if note is None:
|
||||||
self.report_download_webpage(video_id)
|
self.report_download_webpage(video_id)
|
||||||
elif note is not False:
|
elif note is not False:
|
||||||
self.to_screen(u'%s: %s' % (video_id, note))
|
if video_id is None:
|
||||||
|
self.to_screen(u'%s' % (note,))
|
||||||
|
else:
|
||||||
|
self.to_screen(u'%s: %s' % (video_id, note))
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
|
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(errmsg)
|
||||||
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
""" Returns a tuple (page content as string, URL handle) """
|
||||||
|
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
|
||||||
|
if urlh is False:
|
||||||
|
assert not fatal
|
||||||
|
return False
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
|
@ -209,9 +220,14 @@ class InfoExtractor(object):
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||||
|
if res is False:
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
content, _ = res
|
||||||
|
return content
|
||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
||||||
|
|
|
@ -42,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
# If True it will raise an error if no login info is provided
|
# If True it will raise an error if no login info is provided
|
||||||
_LOGIN_REQUIRED = False
|
_LOGIN_REQUIRED = False
|
||||||
|
|
||||||
def report_lang(self):
|
|
||||||
"""Report attempt to set language."""
|
|
||||||
self.to_screen(u'Setting language')
|
|
||||||
|
|
||||||
def _set_language(self):
|
def _set_language(self):
|
||||||
request = compat_urllib_request.Request(self._LANG_URL)
|
return bool(self._download_webpage(
|
||||||
try:
|
self._LANG_URL, None,
|
||||||
self.report_lang()
|
note=u'Setting language', errnote='unable to set language',
|
||||||
self._download_webpage(self._LANG_URL, None, False)
|
fatal=False))
|
||||||
except ExtractorError as err:
|
|
||||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
|
@ -64,8 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
login_page = self._download_webpage(self._LOGIN_URL, None, False,
|
login_page = self._download_webpage(
|
||||||
u'Unable to fetch login page')
|
self._LOGIN_URL, None,
|
||||||
|
note=u'Downloading login page',
|
||||||
|
errnote=u'unable to fetch login page', fatal=False)
|
||||||
|
if login_page is False:
|
||||||
|
return
|
||||||
|
|
||||||
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
|
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
|
||||||
login_page, u'Login GALX parameter')
|
login_page, u'Login GALX parameter')
|
||||||
|
@ -95,26 +91,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
# chokes on unicode
|
# chokes on unicode
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
|
||||||
try:
|
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||||
self.report_login()
|
login_results = self._download_webpage(
|
||||||
login_results = self._download_webpage(request, None, False)
|
req, None,
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
||||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
if login_results is False:
|
||||||
return False
|
return False
|
||||||
except ExtractorError as err:
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _confirm_age(self):
|
def _confirm_age(self):
|
||||||
age_form = {
|
age_form = {
|
||||||
'next_url': '/',
|
'next_url': '/',
|
||||||
'action_confirm': 'Confirm',
|
'action_confirm': 'Confirm',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||||
self.report_age_confirmation()
|
|
||||||
self._download_webpage(request, None, False, u'Unable to confirm age')
|
self._download_webpage(
|
||||||
|
req, None,
|
||||||
|
note=u'Confirming age', errnote=u'Unable to confirm age')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
@ -1736,11 +1734,14 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
|
|
||||||
while (50 * pagenum) < limit:
|
while (50 * pagenum) < limit:
|
||||||
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
||||||
data = self._download_webpage(result_url, u'query "%s"' % query,
|
data_json = self._download_webpage(
|
||||||
u'Downloading page %s' % pagenum, u'Unable to download API page')
|
result_url, video_id=u'query "%s"' % query,
|
||||||
api_response = json.loads(data)['data']
|
note=u'Downloading page %s' % (pagenum + 1),
|
||||||
|
errnote=u'Unable to download API page')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
api_response = data['data']
|
||||||
|
|
||||||
if not 'items' in api_response:
|
if 'items' not in api_response:
|
||||||
raise ExtractorError(u'[youtube] No video results')
|
raise ExtractorError(u'[youtube] No video results')
|
||||||
|
|
||||||
new_ids = list(video['id'] for video in api_response['items'])
|
new_ids = list(video['id'] for video in api_response['items'])
|
||||||
|
|
Loading…
Reference in a new issue