[popcorntimes] Add extractor (closes #23949)

2020-02-03 06:05:56 +07:00 · 2020-02-03 06:05:56 +07:00 · 7d55b62ff2
commit 7d55b62ff2
parent 0d006fac5c
2 changed files with 100 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -850,6 +850,7 @@ from .polskieradio import (
    PolskieRadioIE,
    PolskieRadioCategoryIE,
 )
 from .popcorntimes import PopcorntimesIE
 from .popcorntv import PopcornTVIE
 from .porn91 import Porn91IE
 from .porncom import PornComIE
--- a/youtube_dl/extractor/popcorntimes.py
+++ b/youtube_dl/extractor/popcorntimes.py
@ -0,0 +1,99 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
    compat_chr,
 )
 from ..utils import int_or_none
 class PopcorntimesIE(InfoExtractor):
    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
    _TEST = {
        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
        'md5': '93f210991ad94ba8c3485950a2453257',
        'info_dict': {
            'id': 'A1XCFvz',
            'display_id': 'haensel-und-gretel-opera-fantasy',
            'ext': 'mp4',
            'title': 'Hänsel und Gretel',
            'description': 'md5:1b8146791726342e7b22ce8125cf6945',
            'thumbnail': r're:^https?://.*\.jpg$',
            'creator': 'John Paul',
            'release_date': '19541009',
            'duration': 4260,
            'tbr': 5380,
            'width': 720,
            'height': 540,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id, display_id = mobj.group('id', 'display_id')
        webpage = self._download_webpage(url, display_id)
        title = self._search_regex(
            r'<h1>([^<]+)', webpage, 'title',
            default=None) or self._html_search_meta(
            'ya:ovs:original_name', webpage, 'title', fatal=True)
        loc = self._search_regex(
            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
            group='value')
        loc_b64 = ''
        for c in loc:
            c_ord = ord(c)
            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
                upper = ord('Z') if c_ord <= ord('Z') else ord('z')
                c_ord += 13
                if upper < c_ord:
                    c_ord -= 26
            loc_b64 += compat_chr(c_ord)
        video_url = compat_b64decode(loc_b64).decode('utf-8')
        description = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
            'description', fatal=False)
        thumbnail = self._search_regex(
            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
            webpage, 'thumbnail', default=None,
            group='value') or self._og_search_thumbnail(webpage)
        creator = self._html_search_meta(
            'video:director', webpage, 'creator', default=None)
        release_date = self._html_search_meta(
            'video:release_date', webpage, default=None)
        if release_date:
            release_date = release_date.replace('-', '')
        def int_meta(name):
            return int_or_none(self._html_search_meta(
                name, webpage, default=None))
        return {
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'creator': creator,
            'release_date': release_date,
            'duration': int_meta('video:duration'),
            'tbr': int_meta('ya:ovs:bitrate'),
            'width': int_meta('og:video:width'),
            'height': int_meta('og:video:height'),
            'http_headers': {
                'Referer': url,
            },
        }