[extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447)
This commit is contained in:
		
							parent
							
								
									cf0db4d997
								
							
						
					
					
						commit
						95e42d7336
					
				
					 5 changed files with 57 additions and 22 deletions
				
			
		| 
						 | 
					@ -7,6 +7,7 @@ import json
 | 
				
			||||||
import os.path
 | 
					import os.path
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import types
 | 
					import types
 | 
				
			||||||
 | 
					import ssl
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import youtube_dl.extractor
 | 
					import youtube_dl.extractor
 | 
				
			||||||
| 
						 | 
					@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
 | 
				
			||||||
            real_warning(w)
 | 
					            real_warning(w)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ydl.report_warning = _report_warning
 | 
					    ydl.report_warning = _report_warning
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def http_server_port(httpd):
 | 
				
			||||||
 | 
					    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
 | 
				
			||||||
 | 
					        # In Jython SSLSocket is not a subclass of socket.socket
 | 
				
			||||||
 | 
					        sock = httpd.socket.sock
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        sock = httpd.socket
 | 
				
			||||||
 | 
					    return sock.getsockname()[1]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,11 +9,30 @@ import sys
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
					sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from test.helper import FakeYDL, expect_dict, expect_value
 | 
					from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
 | 
				
			||||||
from youtube_dl.compat import compat_etree_fromstring
 | 
					from youtube_dl.compat import compat_etree_fromstring, compat_http_server
 | 
				
			||||||
from youtube_dl.extractor.common import InfoExtractor
 | 
					from youtube_dl.extractor.common import InfoExtractor
 | 
				
			||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
 | 
					from youtube_dl.extractor import YoutubeIE, get_info_extractor
 | 
				
			||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 | 
					from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
 | 
				
			||||||
 | 
					import threading
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TEAPOT_RESPONSE_STATUS = 418
 | 
				
			||||||
 | 
					TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
				
			||||||
 | 
					    def log_message(self, format, *args):
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def do_GET(self):
 | 
				
			||||||
 | 
					        if self.path == '/teapot':
 | 
				
			||||||
 | 
					            self.send_response(TEAPOT_RESPONSE_STATUS)
 | 
				
			||||||
 | 
					            self.send_header('Content-Type', 'text/html; charset=utf-8')
 | 
				
			||||||
 | 
					            self.end_headers()
 | 
				
			||||||
 | 
					            self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            assert False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestIE(InfoExtractor):
 | 
					class TestIE(InfoExtractor):
 | 
				
			||||||
| 
						 | 
					@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
 | 
				
			||||||
                for i in range(len(entries)):
 | 
					                for i in range(len(entries)):
 | 
				
			||||||
                    expect_dict(self, entries[i], expected_entries[i])
 | 
					                    expect_dict(self, entries[i], expected_entries[i])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_response_with_expected_status_returns_content(self):
 | 
				
			||||||
 | 
					        # Checks for mitigations against the effects of
 | 
				
			||||||
 | 
					        # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
 | 
				
			||||||
 | 
					        # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
 | 
				
			||||||
 | 
					        # or the underlying `_download_webpage_handle` returning no content
 | 
				
			||||||
 | 
					        # when a response matches `expected_status`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        httpd = compat_http_server.HTTPServer(
 | 
				
			||||||
 | 
					            ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
 | 
				
			||||||
 | 
					        port = http_server_port(httpd)
 | 
				
			||||||
 | 
					        server_thread = threading.Thread(target=httpd.serve_forever)
 | 
				
			||||||
 | 
					        server_thread.daemon = True
 | 
				
			||||||
 | 
					        server_thread.start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (content, urlh) = self.ie._download_webpage_handle(
 | 
				
			||||||
 | 
					            'http://127.0.0.1:%d/teapot' % port, None,
 | 
				
			||||||
 | 
					            expected_status=TEAPOT_RESPONSE_STATUS)
 | 
				
			||||||
 | 
					        self.assertEqual(content, TEAPOT_RESPONSE_BODY)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -9,26 +9,16 @@ import sys
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
					sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from test.helper import try_rm
 | 
					from test.helper import http_server_port, try_rm
 | 
				
			||||||
from youtube_dl import YoutubeDL
 | 
					from youtube_dl import YoutubeDL
 | 
				
			||||||
from youtube_dl.compat import compat_http_server
 | 
					from youtube_dl.compat import compat_http_server
 | 
				
			||||||
from youtube_dl.downloader.http import HttpFD
 | 
					from youtube_dl.downloader.http import HttpFD
 | 
				
			||||||
from youtube_dl.utils import encodeFilename
 | 
					from youtube_dl.utils import encodeFilename
 | 
				
			||||||
import ssl
 | 
					 | 
				
			||||||
import threading
 | 
					import threading
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
					TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def http_server_port(httpd):
 | 
					 | 
				
			||||||
    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
 | 
					 | 
				
			||||||
        # In Jython SSLSocket is not a subclass of socket.socket
 | 
					 | 
				
			||||||
        sock = httpd.socket.sock
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        sock = httpd.socket
 | 
					 | 
				
			||||||
    return sock.getsockname()[1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
TEST_SIZE = 10 * 1024
 | 
					TEST_SIZE = 10 * 1024
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,6 +8,7 @@ import sys
 | 
				
			||||||
import unittest
 | 
					import unittest
 | 
				
			||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
					sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from test.helper import http_server_port
 | 
				
			||||||
from youtube_dl import YoutubeDL
 | 
					from youtube_dl import YoutubeDL
 | 
				
			||||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
 | 
					from youtube_dl.compat import compat_http_server, compat_urllib_request
 | 
				
			||||||
import ssl
 | 
					import ssl
 | 
				
			||||||
| 
						 | 
					@ -16,15 +17,6 @@ import threading
 | 
				
			||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
					TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def http_server_port(httpd):
 | 
					 | 
				
			||||||
    if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
 | 
					 | 
				
			||||||
        # In Jython SSLSocket is not a subclass of socket.socket
 | 
					 | 
				
			||||||
        sock = httpd.socket.sock
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        sock = httpd.socket
 | 
					 | 
				
			||||||
    return sock.getsockname()[1]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
					class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
 | 
				
			||||||
    def log_message(self, format, *args):
 | 
					    def log_message(self, format, *args):
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -606,6 +606,11 @@ class InfoExtractor(object):
 | 
				
			||||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
					        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
				
			||||||
            if isinstance(err, compat_urllib_error.HTTPError):
 | 
					            if isinstance(err, compat_urllib_error.HTTPError):
 | 
				
			||||||
                if self.__can_accept_status_code(err, expected_status):
 | 
					                if self.__can_accept_status_code(err, expected_status):
 | 
				
			||||||
 | 
					                    # Retain reference to error to prevent file object from
 | 
				
			||||||
 | 
					                    # being closed before it can be read. Works around the
 | 
				
			||||||
 | 
					                    # effects of <https://bugs.python.org/issue15002>
 | 
				
			||||||
 | 
					                    # introduced in Python 3.4.1.
 | 
				
			||||||
 | 
					                    err.fp._error = err
 | 
				
			||||||
                    return err.fp
 | 
					                    return err.fp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if errnote is False:
 | 
					            if errnote is False:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue