# urllib3/response.py # Copyright 2008-2012 Andrey Petrov and contributors (see CONTRIBUTORS.txt) # # This module is part of urllib3 and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import logging import zlib from .exceptions import DecodeError from .packages.six import string_types as basestring, binary_type log = logging.getLogger(__name__) class DeflateDecoder(object): def __init__(self): self._first_try = True self._data = binary_type() self._obj = zlib.decompressobj() def __getattr__(self, name): return getattr(self._obj, name) def decompress(self, data): if not self._first_try: return self._obj.decompress(data) self._data += data try: return self._obj.decompress(data) except zlib.error: self._first_try = False self._obj = zlib.decompressobj(-zlib.MAX_WBITS) try: return self.decompress(self._data) finally: self._data = None def _get_decoder(mode): if mode == 'gzip': return zlib.decompressobj(16 + zlib.MAX_WBITS) return DeflateDecoder() class HTTPResponse(object): """ HTTP Response container. Backwards-compatible to httplib's HTTPResponse but the response ``body`` is loaded and decoded on-demand when the ``data`` property is accessed. Extra parameters for behaviour not present in httplib.HTTPResponse: :param preload_content: If True, the response's body will be preloaded during construction. :param decode_content: If True, attempts to decode specific content-encoding's based on headers (like 'gzip' and 'deflate') will be skipped and raw data will be used instead. :param original_response: When this HTTPResponse wrapper is generated from an httplib.HTTPResponse object, it's convenient to include the original for debug purposes. It's otherwise unused. """ CONTENT_DECODERS = ['gzip', 'deflate'] def __init__(self, body='', headers=None, status=0, version=0, reason=None, strict=0, preload_content=True, decode_content=True, original_response=None, pool=None, connection=None): self.headers = headers or {} self.status = status self.version = version self.reason = reason self.strict = strict self.decode_content = decode_content self._decoder = None self._body = body if body and isinstance(body, basestring) else None self._fp = None self._original_response = original_response self._pool = pool self._connection = connection if hasattr(body, 'read'): self._fp = body if preload_content and not self._body: self._body = self.read(decode_content=decode_content) def get_redirect_location(self): """ Should we redirect and where to? :returns: Truthy redirect location string if we got a redirect status code and valid location. ``None`` if redirect status and no location. ``False`` if not a redirect status code. """ if self.status in [301, 302, 303, 307]: return self.headers.get('location') return False def release_conn(self): if not self._pool or not self._connection: return self._pool._put_conn(self._connection) self._connection = None @property def data(self): # For backwords-compat with earlier urllib3 0.4 and earlier. if self._body: return self._body if self._fp: return self.read(cache_content=True) def read(self, amt=None, decode_content=None, cache_content=False): """ Similar to :meth:`httplib.HTTPResponse.read`, but with two additional parameters: ``decode_content`` and ``cache_content``. :param amt: How much of the content to read. If specified, caching is skipped because it doesn't make sense to cache partial content as the full response. :param decode_content: If True, will attempt to decode the body based on the 'content-encoding' header. :param cache_content: If True, will save the returned data such that the same result is returned despite of the state of the underlying file object. This is useful if you want the ``.data`` property to continue working after having ``.read()`` the file object. (Overridden if ``amt`` is set.) """ # Note: content-encoding value should be case-insensitive, per RFC 2616 # Section 3.5 content_encoding = self.headers.get('content-encoding', '').lower() if self._decoder is None: if content_encoding in self.CONTENT_DECODERS: self._decoder = _get_decoder(content_encoding) if decode_content is None: decode_content = self.decode_content if self._fp is None: return flush_decoder = False try: if amt is None: # cStringIO doesn't like amt=None data = self._fp.read() flush_decoder = True else: cache_content = False data = self._fp.read(amt) if amt != 0 and not data: # Platform-specific: Buggy versions of Python. # Close the connection when no data is returned # # This is redundant to what httplib/http.client _should_ # already do. However, versions of python released before # December 15, 2012 (http://bugs.python.org/issue16298) do not # properly close the connection in all cases. There is no harm # in redundantly calling close. self._fp.close() flush_decoder = True try: if decode_content and self._decoder: data = self._decoder.decompress(data) except (IOError, zlib.error): raise DecodeError("Received response with content-encoding: %s, but " "failed to decode it." % content_encoding) if flush_decoder and self._decoder: buf = self._decoder.decompress(binary_type()) data += buf + self._decoder.flush() if cache_content: self._body = data return data finally: if self._original_response and self._original_response.isclosed(): self.release_conn() @classmethod def from_httplib(ResponseCls, r, **response_kw): """ Given an :class:`httplib.HTTPResponse` instance ``r``, return a corresponding :class:`urllib3.response.HTTPResponse` object. Remaining parameters are passed to the HTTPResponse constructor, along with ``original_response=r``. """ # Normalize headers between different versions of Python headers = {} for k, v in r.getheaders(): # Python 3: Header keys are returned capitalised k = k.lower() has_value = headers.get(k) if has_value: # Python 3: Repeating header keys are unmerged. v = ', '.join([has_value, v]) headers[k] = v # HTTPResponse objects in Python 3 don't have a .strict attribute strict = getattr(r, 'strict', 0) return ResponseCls(body=r, headers=headers, status=r.status, version=r.version, reason=r.reason, strict=strict, original_response=r, **response_kw) # Backwards-compatibility methods for httplib.HTTPResponse def getheaders(self): return self.headers def getheader(self, name, default=None): return self.headers.get(name, default)