[popcorntimes] Add extractor (closes #23949)

2020-02-03 06:05:56 +07:00 · 2020-02-03 06:05:56 +07:00 · 7d55b62ff2
commit 7d55b62ff2
parent 0d006fac5c
2 changed files with 100 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -850,6 +850,7 @@ from .polskieradio import (
    PolskieRadioIE,
    PolskieRadioCategoryIE,
 )
+from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
 from .porncom import PornComIE
--- a/youtube_dl/extractor/popcorntimes.py
+++ b/youtube_dl/extractor/popcorntimes.py
@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_b64decode,
+    compat_chr,
+)
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+        'md5': '93f210991ad94ba8c3485950a2453257',
+        'info_dict': {
+            'id': 'A1XCFvz',
+            'display_id': 'haensel-und-gretel-opera-fantasy',
+            'ext': 'mp4',
+            'title': 'Hänsel und Gretel',
+            'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'creator': 'John Paul',
+            'release_date': '19541009',
+            'duration': 4260,
+            'tbr': 5380,
+            'width': 720,
+            'height': 540,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id, display_id = mobj.group('id', 'display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title',
+            default=None) or self._html_search_meta(
+            'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+        loc = self._search_regex(
+            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+            group='value')
+
+        loc_b64 = ''
+        for c in loc:
+            c_ord = ord(c)
+            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+                upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+                c_ord += 13
+                if upper < c_ord:
+                    c_ord -= 26
+            loc_b64 += compat_chr(c_ord)
+
+        video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+        description = self._html_search_regex(
+            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+            'description', fatal=False)
+
+        thumbnail = self._search_regex(
+            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+            webpage, 'thumbnail', default=None,
+            group='value') or self._og_search_thumbnail(webpage)
+
+        creator = self._html_search_meta(
+            'video:director', webpage, 'creator', default=None)
+
+        release_date = self._html_search_meta(
+            'video:release_date', webpage, default=None)
+        if release_date:
+            release_date = release_date.replace('-', '')
+
+        def int_meta(name):
+            return int_or_none(self._html_search_meta(
+                name, webpage, default=None))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'creator': creator,
+            'release_date': release_date,
+            'duration': int_meta('video:duration'),
+            'tbr': int_meta('ya:ovs:bitrate'),
+            'width': int_meta('og:video:width'),
+            'height': int_meta('og:video:height'),
+            'http_headers': {
+                'Referer': url,
+            },
+        }