[tweakers] Add new extractor

This commit is contained in:
robin 2015-02-05 19:55:41 +01:00
parent 1b0f3919c1
commit e3aaace400
2 changed files with 42 additions and 0 deletions

View file

@ -475,6 +475,7 @@ from .tutv import TutvIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE from .twentyfourvideo import TwentyFourVideoIE
from .twitch import ( from .twitch import (
TwitchVideoIE, TwitchVideoIE,

View file

@ -0,0 +1,41 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TweakersIE(InfoExtractor):
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>[0-9]+).*'
_TEST = {
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
'md5': 'f7f7f3027166a7f32f024b4ae6571ced',
'info_dict': {
'id': '9926',
'ext': 'mp4',
'title': 'New-Nintendo-3Ds-Xl-Op-Alle-Fronten-Beter',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
splitted_url = re.split('.html|/', url)
del splitted_url[-1] # To remove extra '/' at the end
video_id = splitted_url[4]
title = splitted_url[5].title() # Retrieve title for URL and capitalize
splitted_url[3] = splitted_url[3] + '/player' # Add /player to get the player page
player_url = '/'.join(splitted_url) + '.html'
player_page = self._download_webpage(player_url, video_id)
return {
'id': video_id,
'ext': 'mp4',
'title': title,
'url': re.findall('http.*mp4', player_page)[0],
'player_url': player_url
}