Add an extractor for techtalks.tv (closes #1606)

2013-10-17 08:20:58 +02:00 · 2013-10-17 08:20:58 +02:00 · d21ab29200
commit d21ab29200
parent 54ed626cf8
2 changed files with 66 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -115,6 +115,7 @@ from .statigram import StatigramIE
 from .steam import SteamIE
 from .sztvhu import SztvHuIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@ -0,0 +1,65 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    get_element_by_attribute,
    clean_html,
 )
 class TechTalksIE(InfoExtractor):
    _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
    _TEST = {
        u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
        u'playlist': [
            {
                u'file': u'57758.flv',
                u'info_dict': {
                    u'title': u'Learning Topic Models --- Going beyond SVD',
                },
            },
            {
                u'file': u'57758-slides.flv',
                u'info_dict': {
                    u'title': u'Learning Topic Models --- Going beyond SVD',
                },
            },
        ],
        u'params': {
            # rtmp download
            u'skip_download': True,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        talk_id = mobj.group('id')
        webpage = self._download_webpage(url, talk_id)
        rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage,
            u'rtmp url')
        play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
            webpage, u'presenter play path')
        title = clean_html(get_element_by_attribute('class', 'title', webpage))
        video_info = {
                'id': talk_id,
                'title': title,
                'url': rtmp_url,
                'play_path': play_path,
                'ext': 'flv',
            }
        m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
        if m_slides is None:
            return video_info
        else:
            return [
                video_info,
                # The slides video
                {
                    'id': talk_id + '-slides',
                    'title': title,
                    'url': rtmp_url,
                    'play_path': m_slides.group(1),
                    'ext': 'flv',
                },
            ]