Merge remote-tracking branch 'MikeCol/playvid_extract'

This commit is contained in:
Philipp Hagemeister 2014-03-10 20:45:45 +01:00
commit 777ac90791
2 changed files with 86 additions and 0 deletions

View file

@ -175,6 +175,7 @@ from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE

View file

@ -0,0 +1,85 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class PlayvidIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
_TEST = {
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
'file': 'agbDDi7WZTV.mp4',
'md5': '44930f8afa616efdf9482daf4fe53e1e',
'info_dict': {
'title': 'Michelle Lewin in Miami Beach',
'duration': 240,
'age_limit': 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_title = None
duration = None
video_thumbnail = None
formats = []
# most of the information is stored in the flashvars
flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
if flashvars_match:
infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
for info in infos:
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
if videovars_match:
key = videovars_match.group(1)
val = videovars_match.group(2)
if key == 'title':
video_title = val.replace('+',' ')
if key == 'duration':
try:
duration = val
except ValueError:
duration = None
if key == 'big_thumb':
video_thumbnail = val
videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
if videourl_match:
resolution = int(videourl_match.group('resolution'))
formats.append({
'resolution': resolution, # 360, 480, ...
'ext': determine_ext(val),
'url': val
})
# fatal error, if no download url is found
if len(formats) == 0:
raise ExtractorError,'no video url found'
# Extract title - should be in the flashvars; if not, look elsewhere
if video_title is None:
video_title = self._html_search_regex(
r'<title>(.*?)</title', webpage, 'title')
return {
'id': video_id,
'formats': formats,
'title': video_title,
'thumbnail': video_thumbnail,
'duration': duration,
'description': None,
'age_limit': 18
}