[nzz] Relax kaltura regex

2018-11-20 20:50:40 +01:00 · 2018-11-20 20:50:40 +01:00 · 15ed5a2784
commit 15ed5a2784
parent 2e1280ed43
1 changed files with 10 additions and 3 deletions
--- a/youtube_dl/extractor/nzz.py
+++ b/youtube_dl/extractor/nzz.py
@ -11,20 +11,27 @@ from ..utils import (

 class NZZIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
        'info_dict': {
            'id': '9153',
        },
        'playlist_mincount': 6,
-    }
+    }, {
+        'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+        'info_dict': {
+            'id': '1368112',
+        },
+        'playlist_count': 1,
+    }]

    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)

        entries = []
-        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+        for player_element in re.findall(
+                r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
            player_params = extract_attributes(player_element)
            if player_params.get('data-type') not in ('kaltura_singleArticle',):
                self.report_warning('Unsupported player type')