[zaq1] Add new extractor

This commit is contained in:
slocum 2017-04-09 12:02:44 +02:00 committed by Sergey M․
parent 6ec371cd9e
commit 9dac2cec2d
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 68 additions and 0 deletions

View file

@ -1300,5 +1300,6 @@ from .youtube import (
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE
from .zaq1 import Zaq1IE
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE from .zingmp3 import ZingMp3IE

View file

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
int_or_none
)
class Zaq1IE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://zaq1.pl/video/xev0e',
'md5': '24a5eb3f052e604ae597c4d0d19b351e',
'info_dict': {
'id': 'xev0e',
'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
'ext': 'mp4',
'duration': 511,
'uploader': 'Anonim',
'upload_date': '20170330',
}
}, {
'url': 'http://zaq1.pl/video/x80nc',
'md5': '1245973520adc78139928a820959d9c5',
'info_dict': {
'id': 'x80nc',
'title': 'DIY Inspiration Challenge #86 | koraliki | gwiazdka na choinkę z koralików i drutu',
'ext': 'mp4',
'duration': 438,
'uploader': 'Anonim',
'upload_date': '20170404',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'(?s)<h1>\s*<span.+class="watch-title".+title="([^"]+)">\1\s*</span>\s*</h1>', webpage, 'title')
div = self._search_regex(r'(?s)(?P<div><div.+id=(["\'])video_player\2.+</div>)', webpage, 'video url', group='div')
video_url = self._search_regex(r'data-video-url="(http[^"]+)"', div, 'video url')
ext = self._search_regex(r'data-file-extension="([^"]+)"', div, 'ext', None, False)
duration = int_or_none(self._search_regex(r'data-duration="([^"]+)"', div, 'duration', None, False))
thumbnail = self._search_regex(r'data-photo-url="([^"]+)"', div, 'thumbnail', None, False)
upload_date = unified_strdate(self._search_regex(r'<strong\s+class="watch-time-text">\s*Opublikowany\s+([0-9]{4}-[0-9]{2}-[0-9]{2})', webpage, 'upload date'))
uploader = self._search_regex(r'<div\s+id="watch7-user-header">.*Wideo dodał:\s*<a[^>]*>\s*([^<]+)\s*</a>', webpage, 'uploader')
return {
'id': video_id,
'title': title,
'formats': [{
'url': video_url,
'ext': ext,
'http_headers': {'Referer': url},
}],
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'duration': duration,
}