[safari] Add support for new URL schema (closes #16614)

This commit is contained in:
Sergey M․ 2018-06-03 00:52:22 +07:00
parent 1ea559c445
commit 003fe73ccf
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -74,7 +74,14 @@ class SafariBaseIE(InfoExtractor):
class SafariIE(SafariBaseIE): class SafariIE(SafariBaseIE):
IE_NAME = 'safari' IE_NAME = 'safari'
IE_DESC = 'safaribooksonline.com online video' IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' _VALID_URL = r'''(?x)
https?://
(?:www\.)?safaribooksonline\.com/
(?:
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@ -94,22 +101,41 @@ class SafariIE(SafariBaseIE):
}, { }, {
'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
'only_matching': True,
}] }]
_PARTNER_ID = '1926081'
_UICONF_ID = '29375172'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
webpage = self._download_webpage(url, video_id) reference_id = mobj.group('reference_id')
reference_id = self._search_regex( if reference_id:
r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', video_id = reference_id
webpage, 'kaltura reference id', group='id') partner_id = self._PARTNER_ID
partner_id = self._search_regex( ui_id = self._UICONF_ID
r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', else:
webpage, 'kaltura widget id', group='id') video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
ui_id = self._search_regex(
r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, urlh = self._download_webpage_handle(url, video_id)
webpage, 'kaltura uiconf id', group='id')
mobj = re.match(self._VALID_URL, urlh.geturl())
reference_id = mobj.group('reference_id')
if not reference_id:
reference_id = self._search_regex(
r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'kaltura reference id', group='id')
partner_id = self._search_regex(
r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'kaltura widget id', default=self._PARTNER_ID,
group='id')
ui_id = self._search_regex(
r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'kaltura uiconf id', default=self._UICONF_ID,
group='id')
query = { query = {
'wid': '_%s' % partner_id, 'wid': '_%s' % partner_id,
@ -159,10 +185,15 @@ class SafariCourseIE(SafariBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)| (?:www\.)?safaribooksonline\.com/
(?:
library/view/[^/]+|
api/v1/book|
videos/[^/]+
)|
techbus\.safaribooksonline\.com techbus\.safaribooksonline\.com
) )
/(?P<id>[^/]+)/?(?:[#?]|$) /(?P<id>[^/]+)
''' '''
_TESTS = [{ _TESTS = [{
@ -179,8 +210,16 @@ class SafariCourseIE(SafariBaseIE):
}, { }, {
'url': 'http://techbus.safaribooksonline.com/9780134426365', 'url': 'http://techbus.safaribooksonline.com/9780134426365',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
else super(SafariCourseIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
course_id = self._match_id(url) course_id = self._match_id(url)