[utils] Encode hostnames before passing to urllib
With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs are passed down to urllib/urllib2, causing UnicodeEncodeError Fixes #8890
This commit is contained in:
parent
7da2c87119
commit
efbed08dc2
2 changed files with 11 additions and 0 deletions
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
|
@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):
|
||||||
response = ydl.urlopen(req).read().decode('utf-8')
|
response = ydl.urlopen(req).read().decode('utf-8')
|
||||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||||
|
|
||||||
|
def test_proxy_with_idn(self):
|
||||||
|
ydl = YoutubeDL({
|
||||||
|
'proxy': 'localhost:{0}'.format(self.port),
|
||||||
|
})
|
||||||
|
url = 'http://中文.tw/'
|
||||||
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
|
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||||
|
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -1746,6 +1746,7 @@ def escape_url(url):
|
||||||
"""Escape URL as suggested by RFC 3986"""
|
"""Escape URL as suggested by RFC 3986"""
|
||||||
url_parsed = compat_urllib_parse_urlparse(url)
|
url_parsed = compat_urllib_parse_urlparse(url)
|
||||||
return url_parsed._replace(
|
return url_parsed._replace(
|
||||||
|
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
|
||||||
path=escape_rfc3986(url_parsed.path),
|
path=escape_rfc3986(url_parsed.path),
|
||||||
params=escape_rfc3986(url_parsed.params),
|
params=escape_rfc3986(url_parsed.params),
|
||||||
query=escape_rfc3986(url_parsed.query),
|
query=escape_rfc3986(url_parsed.query),
|
||||||
|
|
Loading…
Reference in a new issue