[extractor/generic] Fix following redirect in Refresh HTTP header on python 2
This commit is contained in:
parent
749b09616d
commit
6c91a5a7f5
1 changed files with 20 additions and 0 deletions
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
|
@ -230,6 +231,22 @@ class GenericIE(InfoExtractor):
|
||||||
'skip_download': False,
|
'skip_download': False,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# redirect in Refresh HTTP header
|
||||||
|
'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pO8h3EaFRdo',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
|
||||||
|
'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
|
||||||
|
'upload_date': '20150917',
|
||||||
|
'uploader_id': 'brtvofficial',
|
||||||
|
'uploader': 'Boiler Room',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': False,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
|
||||||
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
|
||||||
|
@ -1808,6 +1825,9 @@ class GenericIE(InfoExtractor):
|
||||||
# Look also in Refresh HTTP header
|
# Look also in Refresh HTTP header
|
||||||
refresh_header = head_response.headers.get('Refresh')
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
if refresh_header:
|
if refresh_header:
|
||||||
|
# In python 2 response HTTP headers are bytestrings
|
||||||
|
if sys.version_info < (3, 0) and isinstance(refresh_header, str):
|
||||||
|
refresh_header = refresh_header.decode('iso-8859-1')
|
||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
|
||||||
|
|
Loading…
Reference in a new issue