Correct distinction between unicode and bytes (Closes: #257)
This commit is contained in:
parent
c92e184f75
commit
1413cd87eb
1 changed files with 10 additions and 3 deletions
|
@ -290,6 +290,15 @@ def _orderedSet(iterable):
|
|||
res.append(el)
|
||||
return res
|
||||
|
||||
def _unescapeHTML(s):
|
||||
"""
|
||||
@param s a string (of type unicode)
|
||||
"""
|
||||
assert type(s) == type(u'')
|
||||
|
||||
htmlParser = HTMLParser.HTMLParser()
|
||||
return htmlParser.unescape(s)
|
||||
|
||||
class DownloadError(Exception):
|
||||
"""Download Error exception.
|
||||
|
||||
|
@ -1590,8 +1599,6 @@ class DailymotionIE(InfoExtractor):
|
|||
self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
htmlParser = HTMLParser.HTMLParser()
|
||||
|
||||
# Extract id and simplified title from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
|
@ -1635,7 +1642,7 @@ class DailymotionIE(InfoExtractor):
|
|||
if mobj is None:
|
||||
self._downloader.trouble(u'ERROR: unable to extract title')
|
||||
return
|
||||
video_title = htmlParser.unescape(mobj.group('title')).decode('utf-8')
|
||||
video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
|
||||
video_title = sanitize_title(video_title)
|
||||
simple_title = _simplify_title(video_title)
|
||||
|
||||
|
|
Loading…
Reference in a new issue