From bf1b87cd919f07b7fef204838be73981e122ee11 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 17 Apr 2017 08:48:24 +0100 Subject: [PATCH] [common] Relax JWPlayer regex and remove duplicate urls(#12768) --- youtube_dl/extractor/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dcc9d628a..12e010a0d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2182,7 +2182,7 @@ class InfoExtractor(object): def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): mobj = re.search( - r'jwplayer\((?P[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P[^)]+)\)', + r'(?s)jwplayer\((?P[\'"])[^\'" ]+(?P=quote)\).*?\.setup\s*\((?P[^)]+)\)', webpage) if mobj: try: @@ -2258,11 +2258,17 @@ class InfoExtractor(object): def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): + urls = [] formats = [] for source in jwplayer_sources_data: - source_url = self._proto_relative_url(source['file']) + source_url = self._proto_relative_url(source.get('file')) + if not source_url: + continue if base_url: source_url = compat_urlparse.urljoin(base_url, source_url) + if source_url in urls: + continue + urls.append(source_url) source_type = source.get('type') or '' ext = mimetype2ext(source_type) or determine_ext(source_url) if source_type == 'hls' or ext == 'm3u8':