Document and test categories (#2923)
This commit is contained in:
parent
5afa7f8bee
commit
ad3bc6acd5
2 changed files with 11 additions and 7 deletions
|
@ -113,6 +113,8 @@ class InfoExtractor(object):
|
||||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||||
should allow to get the same result again. (It will be set
|
should allow to get the same result again. (It will be set
|
||||||
by YoutubeDL if it's missing)
|
by YoutubeDL if it's missing)
|
||||||
|
categories: A list of categories that the video falls in, for example
|
||||||
|
["Sports", "Berlin"]
|
||||||
|
|
||||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||||
|
|
||||||
|
|
|
@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
u"uploader": u"Philipp Hagemeister",
|
u"uploader": u"Philipp Hagemeister",
|
||||||
u"uploader_id": u"phihag",
|
u"uploader_id": u"phihag",
|
||||||
u"upload_date": u"20121002",
|
u"upload_date": u"20121002",
|
||||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
|
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
||||||
|
u"categories": [u'Science & Technology'],
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1136,18 +1137,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||||
|
|
||||||
# upload date
|
# upload date
|
||||||
upload_date = None
|
upload_date = None
|
||||||
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
|
||||||
video_categories = []
|
|
||||||
# categories
|
|
||||||
m_cat_container = get_element_by_id("eow-category", video_webpage)
|
m_cat_container = get_element_by_id("eow-category", video_webpage)
|
||||||
if m_cat_container:
|
if m_cat_container:
|
||||||
video_categories = re.findall(r'<a[^<]+>(.*?)</a>',
|
category = self._html_search_regex(
|
||||||
m_cat_container, re.DOTALL)
|
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'cateory',
|
||||||
|
default=None)
|
||||||
|
video_categories = None if category is None else [category]
|
||||||
|
else:
|
||||||
|
video_categories = None
|
||||||
|
|
||||||
# description
|
# description
|
||||||
video_description = get_element_by_id("eow-description", video_webpage)
|
video_description = get_element_by_id("eow-description", video_webpage)
|
||||||
|
|
Loading…
Reference in a new issue