[camdemy] Add new extractor

Single file download done, while folder extaction in plan
2015-02-11 16:39:15 +08:00 · 2015-02-11 16:39:15 +08:00 · 8708d76425
commit 8708d76425
parent 054fe3cc40
2 changed files with 79 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -49,6 +49,7 @@ from .brightcove import BrightcoveIE
 from .buzzfeed import BuzzFeedIE
 from .byutv import BYUtvIE
 from .c56 import C56IE
 from .camdemy import CamdemyIE
 from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
--- a/youtube_dl/extractor/camdemy.py
+++ b/youtube_dl/extractor/camdemy.py
@ -0,0 +1,78 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_urlparse
 from ..utils import parse_iso8601
 class CamdemyIE(InfoExtractor):
    _VALID_URL = r'http://www.camdemy.com/media/(?P<id>\d+).*'
    _TESTS = [{
        # single file
        'url': 'http://www.camdemy.com/media/5181/',
        'md5': '5a5562b6a98b37873119102e052e311b',
        'info_dict': {
            'id': '5181',
            'ext': 'mp4',
            'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
            'thumbnail': 're:^https?://.*\.jpg$',
            'description': '',
            'creator': 'ss11spring',
            'upload_date': '20130114',
            'timestamp': 1358154556,
        }
    }, {
        # With non-empty description
        'url': 'http://www.camdemy.com/media/13885',
        'md5': '4576a3bb2581f86c61044822adbd1249',
        'info_dict': {
            'id': '13885',
            'ext': 'mp4',
            'title': 'EverCam + Camdemy QuickStart',
            'thumbnail': 're:^https?://.*\.jpg$',
            'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
            'creator': 'evercam',
            'upload_date': '20140620',
            'timestamp': 1403271569,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
        oembed_obj = self._download_json(
            'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
        thumb_url = oembed_obj['thumbnail_url']
        video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
        fileListXML = self._download_xml(
            compat_urlparse.urljoin(video_folder, 'fileList.xml'),
            video_id, 'Filelist XML')
        fileName = fileListXML.find('./video/item/fileName').text
        creation_time = self._html_search_regex(
            r"<div class='title'>Posted :</div>.*<div class='value'>([0-9:\- ]+)<",
            page, 'creation time', flags=re.MULTILINE | re.DOTALL) + '+08:00'
        creation_timestamp = parse_iso8601(creation_time, delimiter=' ')
        view_count_str = self._html_search_regex(
            r"<div class='title'>Views :</div>.*<div class='value'>([0-9,]+)<",
            page, 'view count', flags=re.MULTILINE | re.DOTALL)
        views = int(view_count_str.replace(',', ''))
        return {
            'id': video_id,
            'url': compat_urlparse.urljoin(video_folder, fileName),
            'title': oembed_obj['title'],
            'thumbnail': thumb_url,
            'description': self._html_search_meta('description', page),
            'creator': oembed_obj['author_name'],
            'duration': oembed_obj['duration'],
            'timestamp': creation_timestamp,
            'view_count': views,
        }