[nuevo] Generalize nuevo extractor and add support for trollvids

Supports only the nuevo player for now (most common).

[trollvids] convert duration to an int

[trollvids] added a test

[trollvids] made flake8 shut up

Generalized the Nuevo extractor

Affects: anitube, trollvids, trutube

[nuevo] Complied with the code comments.
This commit is contained in:
Andrew "Akari" Alexeyew 2015-12-02 06:00:47 +02:00 committed by Sergey M․
parent 4fcd9d147d
commit d570746e45
5 changed files with 98 additions and 46 deletions

View file

@ -726,6 +726,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trollvids import TrollvidsIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE

View file

@ -2,10 +2,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .nuevo import NuevoBaseIE
class AnitubeIE(InfoExtractor):
class AnitubeIE(NuevoBaseIE):
IE_NAME = 'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
key = self._search_regex(
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
config_xml = self._download_xml(
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
video_title = config_xml.find('title').text
thumbnail = config_xml.find('image').text
duration = float(config_xml.find('duration').text)
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}
config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
return self._extract_nuevo(config_url, video_id)

View file

@ -0,0 +1,37 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
xpath_text
)
class NuevoBaseIE(InfoExtractor):
def _extract_nuevo(self, config_url, video_id):
tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
title = xpath_text(tree, './title')
if title:
title = title.strip()
thumbnail = xpath_text(tree, './image')
duration = float_or_none(xpath_text(tree, './duration'))
formats = []
for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
video_url = tree.find(element_name)
video_url is None or formats.append({
'format_id': format_id,
'url': video_url.text
})
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats
}

View file

@ -0,0 +1,49 @@
# encoding: utf-8
from __future__ import unicode_literals
from .nuevo import NuevoBaseIE
from ..compat import (
compat_urllib_parse_unquote
)
import re
class TrollvidsIE(NuevoBaseIE):
_VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)'
IE_NAME = 'trollvids'
def _real_extract(self, url):
match = re.match(self._VALID_URL, url)
video_id = match.group('id')
raw_video_title = match.group('title')
url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title)
config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id
info = self._extract_nuevo(config_url, video_id)
info.update({
'webpage_url': url,
'age_limit': 18
})
if 'title' not in info:
info['title'] = compat_urllib_parse_unquote(raw_video_title)
return info
_TESTS = [
{
'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
'md5': '1d53866b2c514b23ed69e4352fdc9839',
'info_dict': {
'id': '2349002',
'ext': 'mp4',
'title': "【MMD R-18】ガールフレンド carry_me_off",
'age_limit': 18,
'duration': 216.78,
},
},
]

View file

@ -1,10 +1,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import xpath_text
from .nuevo import NuevoBaseIE
class TruTubeIE(InfoExtractor):
class TruTubeIE(NuevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id
config = self._download_xml(
'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
video_id, transform_source=lambda s: s.strip())
info = self._extract_nuevo(config_url, video_id)
# filehd is always 404
video_url = xpath_text(config, './file', 'video URL', fatal=True)
title = xpath_text(config, './title', 'title').strip()
thumbnail = xpath_text(config, './image', ' thumbnail')
# filehd always 404s
info['formats'] = info['formats'][:1]
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
}
return info