[roosterteeth] Add extractor

This commit is contained in:
Nehal Patel 2016-06-22 02:58:42 -05:00 committed by Sergey M․
parent 0286b85c79
commit 3121b25639
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 113 additions and 0 deletions

View file

@ -680,6 +680,7 @@ from .rice import RICEIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rockstargames import RockstarGamesIE from .rockstargames import RockstarGamesIE
from .roosterteeth import RoosterTeethIE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtbf import RTBFIE from .rtbf import RTBFIE

View file

@ -0,0 +1,112 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
)
class RoosterTeethIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
_LOGIN_URL = 'https://roosterteeth.com/login'
_NETRC_MACHINE = 'roosterteeth'
_TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'info_dict': {
'id': '26576',
'ext': 'mp4',
'title': 'Million Dollars, But... The Game Announcement',
'thumbnail': 're:^https?://.*\.png$',
'description': 'Introducing Million Dollars, But... The Game! Available for pre-order now at www.MDBGame.com ',
'creator': 'Rooster Teeth',
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
'episode_id': '26576',
},
'params': {
'skip_download': True, # m3u8 downloads
},
}, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True,
}, {
'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
'only_matching': True,
}, {
'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
'only_matching': True,
}, {
'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
'only_matching': True,
}]
def _login(self):
(username, password) = self._get_login_info()
if username is None or password is None:
return False
# token is required to authenticate request
login_page = self._download_webpage(self._LOGIN_URL, None, 'Getting login token', 'Unable to get login token')
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
})
login_payload = urlencode_postdata(login_form)
# required for proper responses
login_headers = {
'Referer': self._LOGIN_URL,
}
login_request = self._download_webpage(
self._LOGIN_URL, None,
note='Logging in as %s' % username,
data=login_payload,
headers=login_headers)
if 'Authentication failed' in login_request:
raise ExtractorError(
'Login failed (invalid username/password)', expected=True)
def _real_initialize(self):
self._login()
def _real_extract(self, url):
match_id = self._match_id(url)
webpage = self._download_webpage(url, match_id)
episode_id = self._html_search_regex(r"commentControls\('#comment-([0-9]+)'\)", webpage, 'episode id', match_id, False)
self.report_extraction(episode_id)
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'episode title', self._og_search_title(webpage), False)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
creator = self._html_search_regex(r'<h3>Latest (.+) Gear</h3>', webpage, 'site', 'Rooster Teeth', False)
series = self._html_search_regex(r'<h2>More ([^<]+)</h2>', webpage, 'series', fatal=False)
episode = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'episode title', fatal=False)
if '<div class="non-sponsor">' in webpage:
self.raise_login_required('%s is only available for FIRST members' % title)
if '<div class="golive-gate">' in webpage:
self.raise_login_required('%s is not available yet' % title)
formats = self._extract_m3u8_formats(self._html_search_regex(r"file: '(.+?)m3u8'", webpage, 'm3u8 url') + 'm3u8', episode_id, ext='mp4')
self._sort_formats(formats)
return {
'id': episode_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
'creator': creator,
'series': series,
'episode': episode,
'episode_id': episode_id,
}