[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.
This commit is contained in:
Nathan Rossi 2018-05-26 02:34:22 +10:00 committed by Sergey M․
parent f20f636596
commit 9d082e7cb8
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -56,6 +56,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@ -208,6 +209,17 @@ class FacebookIE(InfoExtractor):
# no title # no title
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
'info_dict': {
'id': '359649331226507',
'ext': 'mp4',
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
'uploader': 'ESL One Dota 2',
},
'params': {
'skip_download': True,
},
}] }]
@staticmethod @staticmethod
@ -323,6 +335,24 @@ class FacebookIE(InfoExtractor):
server_js_data, lambda x: x['jsmods']['instances'], server_js_data, lambda x: x['jsmods']['instances'],
list) or []) list) or [])
if not video_data:
# video info not in first request, do a secondary request using tahoe player specific url
tahoe_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({
'__user': 0,
'__a': 1,
'__pc': self._search_regex(r'"pkg_cohort":"(.*?)"', webpage, 'pkg cohort', default='PHASED:DEFAULT'),
'__rev': self._search_regex(r'"client_revision":(\d+),', webpage, 'client revision', default=3944515),
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
tahoe_js_data = self._parse_json(self._search_regex(
r'for \(;;\);(.+)', tahoe_data,
'tahoe js data', default='{}'), video_id, fatal=False)
video_data = extract_video_data(tahoe_js_data.get('jsmods', {}).get('instances', []))
if not video_data: if not video_data:
if not fatal_if_no_video: if not fatal_if_no_video:
return webpage, False return webpage, False
@ -378,9 +408,11 @@ class FacebookIE(InfoExtractor):
video_title = limit_length(video_title, 80) video_title = limit_length(video_title, 80)
else: else:
video_title = 'Facebook video #%s' % video_id video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id( uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
'fbPhotoPageAuthorName', webpage)) or self._search_regex( if not uploader:
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False) uploader = self._search_regex(
[r'ownerName\s*:\s*"([^"]+)"', r'property="og:title"\s*content="(.*?)"'],
webpage, 'uploader', fatal=False)
timestamp = int_or_none(self._search_regex( timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage, r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None)) 'timestamp', default=None))