[Sohu] Fix title extraction

This commit is contained in:
Yen Chi Hsuan 2015-03-15 01:05:01 +08:00
parent cd65491c30
commit 2cb434e53e
1 changed files with 13 additions and 4 deletions

View File

@ -73,6 +73,17 @@ class SohuIE(InfoExtractor):
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
} }
}] }]
}, {
'info': 'Video with title containing dash',
'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
'info_dict': {
'id': '78932792',
'ext': 'mp4',
'title': 'youtube-dl testing video',
},
'params': {
'skip_download': True
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -97,10 +108,8 @@ class SohuIE(InfoExtractor):
mytv = mobj.group('mytv') is not None mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(
r'(?s)<title>(.+?)</title>', title = self._og_search_title(webpage)
webpage, 'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex( vid = self._html_search_regex(
r'var vid ?= ?["\'](\d+)["\']', r'var vid ?= ?["\'](\d+)["\']',