[nuevo] Generalize nuevo extractor and add support for trollvids

Supports only the nuevo player for now (most common).

[trollvids] convert duration to an int

[trollvids] added a test

[trollvids] made flake8 shut up

Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube

[nuevo] Complied with the code comments.
This commit is contained in:
Andrew "Akari" Alexeyew 2015-12-02 06:00:47 +02:00 committed by Sergey M․
parent 4fcd9d147d
commit d570746e45
5 changed files with 98 additions and 46 deletions

View file

@ -726,6 +726,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trollvids import TrollvidsIE
from .trutube import TruTubeIE from .trutube import TruTubeIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubitv import TubiTvIE from .tubitv import TubiTvIE

View file

@ -2,10 +2,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .nuevo import NuevoBaseIE
class AnitubeIE(InfoExtractor): class AnitubeIE(NuevoBaseIE):
IE_NAME = 'anitube.se' IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
key = self._search_regex( key = self._search_regex(
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
config_xml = self._download_xml( config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) return self._extract_nuevo(config_url, video_id)
video_title = config_xml.find('title').text
thumbnail = config_xml.find('image').text
duration = float(config_xml.find('duration').text)
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}

View file

@ -0,0 +1,37 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
xpath_text
)
class NuevoBaseIE(InfoExtractor):
def _extract_nuevo(self, config_url, video_id):
tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
title = xpath_text(tree, './title')
if title:
title = title.strip()
thumbnail = xpath_text(tree, './image')
duration = float_or_none(xpath_text(tree, './duration'))
formats = []
for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
video_url = tree.find(element_name)
video_url is None or formats.append({
'format_id': format_id,
'url': video_url.text
})
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}

View file

@ -0,0 +1,49 @@
# encoding: utf-8
from __future__ import unicode_literals
from .nuevo import NuevoBaseIE
from ..compat import (
compat_urllib_parse_unquote
)
import re
class TrollvidsIE(NuevoBaseIE):
_VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)'
IE_NAME = 'trollvids'
def _real_extract(self, url):
match = re.match(self._VALID_URL, url)
video_id = match.group('id')
raw_video_title = match.group('title')
url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title)
config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id
info = self._extract_nuevo(config_url, video_id)
info.update({
'webpage_url': url,
'age_limit': 18
})
if 'title' not in info:
info['title'] = compat_urllib_parse_unquote(raw_video_title)
return info
_TESTS = [
{
'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
'md5': '1d53866b2c514b23ed69e4352fdc9839',
'info_dict': {
'id': '2349002',
'ext': 'mp4',
'title': "【MMD R-18】ガールフレンド carry_me_off",
'age_limit': 18,
'duration': 216.78,
},
},
]

View file

@ -1,10 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .nuevo import NuevoBaseIE
from ..utils import xpath_text
class TruTubeIE(InfoExtractor): class TruTubeIE(NuevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id
config = self._download_xml( info = self._extract_nuevo(config_url, video_id)
'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
video_id, transform_source=lambda s: s.strip())
# filehd is always 404 # filehd always 404s
video_url = xpath_text(config, './file', 'video URL', fatal=True) info['formats'] = info['formats'][:1]
title = xpath_text(config, './title', 'title').strip()
thumbnail = xpath_text(config, './image', ' thumbnail')
return { return info
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}