[vk:uservideos] Improve extraction
This commit is contained in:
parent
74fe23ec35
commit
dc786d3db5
1 changed files with 18 additions and 9 deletions
|
@ -291,23 +291,32 @@ class VKIE(InfoExtractor):
|
||||||
class VKUserVideosIE(InfoExtractor):
|
class VKUserVideosIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com:user-videos'
|
IE_NAME = 'vk.com:user-videos'
|
||||||
IE_DESC = 'vk.com:All of a user\'s videos'
|
IE_DESC = 'vk.com:All of a user\'s videos'
|
||||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
|
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://vk.com/videos205387401',
|
'url': 'http://vk.com/videos205387401',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '205387401',
|
'id': '205387401',
|
||||||
|
'title': "Tom Cruise's Videos",
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://vk.com/videos-77521',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
page = self._download_webpage(url, page_id)
|
|
||||||
video_ids = orderedSet(
|
webpage = self._download_webpage(url, page_id)
|
||||||
m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
|
|
||||||
url_entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
|
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
|
||||||
for video_id in video_ids]
|
for video_id in set(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
|
||||||
return self.playlist_result(url_entries, page_id)
|
|
||||||
|
title = unescapeHTML(self._search_regex(
|
||||||
|
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
|
||||||
|
webpage, 'title', default=page_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, page_id, title)
|
||||||
|
|
Loading…
Reference in a new issue