[teamcoco] Fix extraction for full episodes(closes #16573)
This commit is contained in:
		
							parent
							
								
									a07879d6b2
								
							
						
					
					
						commit
						e0d42dd4b2
					
				
					 3 changed files with 122 additions and 88 deletions
				
			
		| 
						 | 
				
			
			@ -4,6 +4,10 @@ from __future__ import unicode_literals
 | 
			
		|||
import re
 | 
			
		||||
 | 
			
		||||
from .turner import TurnerBaseIE
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
| 
						 | 
				
			
			@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
 | 
			
		|||
    def _real_extract(self, url):
 | 
			
		||||
        site, display_id = re.match(self._VALID_URL, url).groups()
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
        video_data = self._parse_json(self._search_regex(
 | 
			
		||||
        drupal_settings = self._parse_json(self._search_regex(
 | 
			
		||||
            r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
 | 
			
		||||
            webpage, 'drupal setting'), display_id)['turner_playlist'][0]
 | 
			
		||||
            webpage, 'drupal setting'), display_id)
 | 
			
		||||
        video_data = drupal_settings['turner_playlist'][0]
 | 
			
		||||
 | 
			
		||||
        media_id = video_data['mediaID']
 | 
			
		||||
        title = video_data['title']
 | 
			
		||||
        tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
 | 
			
		||||
            drupal_settings['ngtv_token_url']).query)
 | 
			
		||||
 | 
			
		||||
        streams_data = self._download_json(
 | 
			
		||||
            'http://medium.ngtv.io/media/%s/tv' % media_id,
 | 
			
		||||
            media_id)['media']['tv']
 | 
			
		||||
        duration = None
 | 
			
		||||
        chapters = []
 | 
			
		||||
        formats = []
 | 
			
		||||
        for supported_type in ('unprotected', 'bulkaes'):
 | 
			
		||||
            stream_data = streams_data.get(supported_type, {})
 | 
			
		||||
            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
 | 
			
		||||
            if not m3u8_url:
 | 
			
		||||
                continue
 | 
			
		||||
            if stream_data.get('playlistProtection') == 'spe':
 | 
			
		||||
                m3u8_url = self._add_akamai_spe_token(
 | 
			
		||||
                    'http://token.vgtf.net/token/token_spe',
 | 
			
		||||
                    m3u8_url, media_id, {
 | 
			
		||||
                        'url': url,
 | 
			
		||||
                        'site_name': site[:3].upper(),
 | 
			
		||||
                        'auth_required': video_data.get('authRequired') == '1',
 | 
			
		||||
                    })
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
 | 
			
		||||
 | 
			
		||||
            duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
 | 
			
		||||
 | 
			
		||||
            if not chapters:
 | 
			
		||||
                for chapter in stream_data.get('contentSegments', []):
 | 
			
		||||
                    start_time = float_or_none(chapter.get('start'))
 | 
			
		||||
                    duration = float_or_none(chapter.get('duration'))
 | 
			
		||||
                    if start_time is None or duration is None:
 | 
			
		||||
                        continue
 | 
			
		||||
                    chapters.append({
 | 
			
		||||
                        'start_time': start_time,
 | 
			
		||||
                        'end_time': start_time + duration,
 | 
			
		||||
                    })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
        info = self._extract_ngtv_info(
 | 
			
		||||
            media_id, tokenizer_query, {
 | 
			
		||||
                'url': url,
 | 
			
		||||
                'site_name': site[:3].upper(),
 | 
			
		||||
                'auth_required': video_data.get('authRequired') == '1',
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        thumbnails = []
 | 
			
		||||
        for image_id, image in video_data.get('images', {}).items():
 | 
			
		||||
| 
						 | 
				
			
			@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
 | 
			
		|||
                })
 | 
			
		||||
            thumbnails.append(i)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info.update({
 | 
			
		||||
            'id': media_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
 | 
			
		||||
            'timestamp': int_or_none(video_data.get('created')),
 | 
			
		||||
            'season_number': int_or_none(video_data.get('season')),
 | 
			
		||||
            'episode_number': int_or_none(video_data.get('episode')),
 | 
			
		||||
            'cahpters': chapters,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
        }
 | 
			
		||||
        })
 | 
			
		||||
        return info
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,7 +3,7 @@ from __future__ import unicode_literals
 | 
			
		|||
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .turner import TurnerBaseIE
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
| 
						 | 
				
			
			@ -15,7 +15,7 @@ from ..utils import (
 | 
			
		|||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TeamcocoIE(InfoExtractor):
 | 
			
		||||
class TeamcocoIE(TurnerBaseIE):
 | 
			
		||||
    _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
| 
						 | 
				
			
			@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
 | 
			
		|||
          name
 | 
			
		||||
        }
 | 
			
		||||
        duration
 | 
			
		||||
        turnerMediaId
 | 
			
		||||
        turnerMediaAuthToken
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    ... on NotFoundSlug {
 | 
			
		||||
| 
						 | 
				
			
			@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
 | 
			
		|||
        record = response['record']
 | 
			
		||||
        video_id = record['id']
 | 
			
		||||
 | 
			
		||||
        video_sources = self._graphql_call('''{
 | 
			
		||||
  %s(id: "%s") {
 | 
			
		||||
    src
 | 
			
		||||
  }
 | 
			
		||||
}''', 'RecordVideoSource', video_id) or {}
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
 | 
			
		||||
        for format_id, src in video_sources.get('src', {}).items():
 | 
			
		||||
            if not isinstance(src, dict):
 | 
			
		||||
                continue
 | 
			
		||||
            src_url = src.get('src')
 | 
			
		||||
            if not src_url:
 | 
			
		||||
                continue
 | 
			
		||||
            ext = determine_ext(src_url, mimetype2ext(src.get('type')))
 | 
			
		||||
            if format_id == 'hls' or ext == 'm3u8':
 | 
			
		||||
                # compat_urllib_parse.urljoin does not work here
 | 
			
		||||
                if src_url.startswith('/'):
 | 
			
		||||
                    src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
 | 
			
		||||
                formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                    src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
 | 
			
		||||
            else:
 | 
			
		||||
                if src_url.startswith('/mp4:protected/'):
 | 
			
		||||
                    # TODO Correct extraction for these files
 | 
			
		||||
                    continue
 | 
			
		||||
                tbr = int_or_none(self._search_regex(
 | 
			
		||||
                    r'(\d+)k\.mp4', src_url, 'tbr', default=None))
 | 
			
		||||
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': src_url,
 | 
			
		||||
                    'ext': ext,
 | 
			
		||||
                    'tbr': tbr,
 | 
			
		||||
                    'format_id': format_id,
 | 
			
		||||
                    'quality': get_quality(format_id),
 | 
			
		||||
                })
 | 
			
		||||
        if not formats:
 | 
			
		||||
            formats = self._extract_m3u8_formats(
 | 
			
		||||
                record['file']['url'], video_id, 'mp4', fatal=False)
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'display_id': display_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': record['title'],
 | 
			
		||||
            'thumbnail': record.get('thumb', {}).get('preview'),
 | 
			
		||||
            'description': record.get('teaser'),
 | 
			
		||||
            'duration': parse_duration(record.get('duration')),
 | 
			
		||||
            'timestamp': parse_iso8601(record.get('publishOn')),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        media_id = record.get('turnerMediaId')
 | 
			
		||||
        if media_id:
 | 
			
		||||
            self._initialize_geo_bypass({
 | 
			
		||||
                'countries': ['US'],
 | 
			
		||||
            })
 | 
			
		||||
            info.update(self._extract_ngtv_info(media_id, {
 | 
			
		||||
                'accessToken': record['turnerMediaAuthToken'],
 | 
			
		||||
                'accessTokenType': 'jws',
 | 
			
		||||
            }))
 | 
			
		||||
        else:
 | 
			
		||||
            video_sources = self._graphql_call('''{
 | 
			
		||||
  %s(id: "%s") {
 | 
			
		||||
    src
 | 
			
		||||
  }
 | 
			
		||||
}''', 'RecordVideoSource', video_id) or {}
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
 | 
			
		||||
            for format_id, src in video_sources.get('src', {}).items():
 | 
			
		||||
                if not isinstance(src, dict):
 | 
			
		||||
                    continue
 | 
			
		||||
                src_url = src.get('src')
 | 
			
		||||
                if not src_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                ext = determine_ext(src_url, mimetype2ext(src.get('type')))
 | 
			
		||||
                if format_id == 'hls' or ext == 'm3u8':
 | 
			
		||||
                    # compat_urllib_parse.urljoin does not work here
 | 
			
		||||
                    if src_url.startswith('/'):
 | 
			
		||||
                        src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
 | 
			
		||||
                else:
 | 
			
		||||
                    if src_url.startswith('/mp4:protected/'):
 | 
			
		||||
                        # TODO Correct extraction for these files
 | 
			
		||||
                        continue
 | 
			
		||||
                    tbr = int_or_none(self._search_regex(
 | 
			
		||||
                        r'(\d+)k\.mp4', src_url, 'tbr', default=None))
 | 
			
		||||
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'url': src_url,
 | 
			
		||||
                        'ext': ext,
 | 
			
		||||
                        'tbr': tbr,
 | 
			
		||||
                        'format_id': format_id,
 | 
			
		||||
                        'quality': get_quality(format_id),
 | 
			
		||||
                    })
 | 
			
		||||
            if not formats:
 | 
			
		||||
                formats = self._extract_m3u8_formats(
 | 
			
		||||
                    record['file']['url'], video_id, 'mp4', fatal=False)
 | 
			
		||||
            self._sort_formats(formats)
 | 
			
		||||
            info['formats'] = formats
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -9,6 +9,7 @@ from ..utils import (
 | 
			
		|||
    xpath_text,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    xpath_attr,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
| 
						 | 
				
			
			@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
 | 
			
		|||
    def _extract_timestamp(self, video_data):
 | 
			
		||||
        return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
 | 
			
		||||
 | 
			
		||||
    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
 | 
			
		||||
    def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
 | 
			
		||||
        secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
 | 
			
		||||
        token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
 | 
			
		||||
        if not token:
 | 
			
		||||
            query = {
 | 
			
		||||
                'path': secure_path,
 | 
			
		||||
                'videoId': content_id,
 | 
			
		||||
            }
 | 
			
		||||
            if custom_tokenizer_query:
 | 
			
		||||
                query.update(custom_tokenizer_query)
 | 
			
		||||
            else:
 | 
			
		||||
                query['videoId'] = content_id
 | 
			
		||||
            if ap_data.get('auth_required'):
 | 
			
		||||
                query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
 | 
			
		||||
            auth = self._download_xml(
 | 
			
		||||
| 
						 | 
				
			
			@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
 | 
			
		|||
            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
 | 
			
		||||
            'is_live': is_live,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
 | 
			
		||||
        streams_data = self._download_json(
 | 
			
		||||
            'http://medium.ngtv.io/media/%s/tv' % media_id,
 | 
			
		||||
            media_id)['media']['tv']
 | 
			
		||||
        duration = None
 | 
			
		||||
        chapters = []
 | 
			
		||||
        formats = []
 | 
			
		||||
        for supported_type in ('unprotected', 'bulkaes'):
 | 
			
		||||
            stream_data = streams_data.get(supported_type, {})
 | 
			
		||||
            m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
 | 
			
		||||
            if not m3u8_url:
 | 
			
		||||
                continue
 | 
			
		||||
            if stream_data.get('playlistProtection') == 'spe':
 | 
			
		||||
                m3u8_url = self._add_akamai_spe_token(
 | 
			
		||||
                    'http://token.ngtv.io/token/token_spe',
 | 
			
		||||
                    m3u8_url, media_id, ap_data or {}, tokenizer_query)
 | 
			
		||||
            formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
 | 
			
		||||
 | 
			
		||||
            duration = float_or_none(stream_data.get('totalRuntime'))
 | 
			
		||||
 | 
			
		||||
            if not chapters:
 | 
			
		||||
                for chapter in stream_data.get('contentSegments', []):
 | 
			
		||||
                    start_time = float_or_none(chapter.get('start'))
 | 
			
		||||
                    chapter_duration = float_or_none(chapter.get('duration'))
 | 
			
		||||
                    if start_time is None or chapter_duration is None:
 | 
			
		||||
                        continue
 | 
			
		||||
                    chapters.append({
 | 
			
		||||
                        'start_time': start_time,
 | 
			
		||||
                        'end_time': start_time + chapter_duration,
 | 
			
		||||
                    })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'chapters': chapters,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue