Full youtube video descriptions, including special characters (2.6+, with fallback for older Pythons)
This commit is contained in:
parent
aded78d9e2
commit
c6b55a8d48
1 changed files with 30 additions and 8 deletions
32
youtube-dl
32
youtube-dl
|
@ -15,7 +15,6 @@ import email.utils
|
||||||
import gzip
|
import gzip
|
||||||
import htmlentitydefs
|
import htmlentitydefs
|
||||||
import httplib
|
import httplib
|
||||||
import json # TODO: json for 2.5
|
|
||||||
import locale
|
import locale
|
||||||
import math
|
import math
|
||||||
import netrc
|
import netrc
|
||||||
|
@ -24,20 +23,35 @@ import os.path
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import string
|
import string
|
||||||
import StringIO
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import warnings
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
except ImportError:
|
||||||
|
warnings.warn('No JSON support (TODO: insert trivialjson here)')
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cStringIO as StringIO
|
||||||
|
except ImportError:
|
||||||
|
import StringIO
|
||||||
|
|
||||||
# parse_qs was moved from the cgi module to the urlparse module recently.
|
# parse_qs was moved from the cgi module to the urlparse module recently.
|
||||||
try:
|
try:
|
||||||
from urlparse import parse_qs
|
from urlparse import parse_qs
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from cgi import parse_qs
|
from cgi import parse_qs
|
||||||
|
|
||||||
|
try:
|
||||||
|
import lxml.etree
|
||||||
|
except ImportError: # Python < 2.6
|
||||||
|
pass # Handled below
|
||||||
|
|
||||||
std_headers = {
|
std_headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
|
||||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||||
|
@ -1068,11 +1082,19 @@ class YoutubeIE(InfoExtractor):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# description
|
# description
|
||||||
video_description = 'No description available.'
|
try:
|
||||||
|
lxml.etree
|
||||||
|
except NameError:
|
||||||
|
video_description = u'No description available.'
|
||||||
if self._downloader.params.get('forcedescription', False):
|
if self._downloader.params.get('forcedescription', False):
|
||||||
|
warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.')
|
||||||
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
|
mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
video_description = mobj.group(1)
|
video_description = mobj.group(1).decode('utf-8')
|
||||||
|
else:
|
||||||
|
html_parser = lxml.etree.HTMLParser(encoding='utf-8')
|
||||||
|
vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
|
||||||
|
video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
|
||||||
|
|
||||||
# token
|
# token
|
||||||
video_token = urllib.unquote_plus(video_info['token'][0])
|
video_token = urllib.unquote_plus(video_info['token'][0])
|
||||||
|
@ -1130,7 +1152,7 @@ class YoutubeIE(InfoExtractor):
|
||||||
'ext': video_extension.decode('utf-8'),
|
'ext': video_extension.decode('utf-8'),
|
||||||
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
|
||||||
'thumbnail': video_thumbnail.decode('utf-8'),
|
'thumbnail': video_thumbnail.decode('utf-8'),
|
||||||
'description': video_description.decode('utf-8'),
|
'description': video_description,
|
||||||
'player_url': player_url,
|
'player_url': player_url,
|
||||||
})
|
})
|
||||||
except UnavailableVideoError, err:
|
except UnavailableVideoError, err:
|
||||||
|
|
Loading…
Reference in a new issue