Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)
Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).
This commit is contained in:
		
							parent
							
								
									755ff8d22c
								
							
						
					
					
						commit
						36e6f62cd0
					
				
					 11 changed files with 61 additions and 21 deletions
				
			
		| 
						 | 
				
			
			@ -13,8 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		|||
from youtube_dl.utils import get_filesystem_encoding
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_getenv,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_expanduser,
 | 
			
		||||
    compat_shlex_split,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_parse_unquote,
 | 
			
		||||
    compat_urllib_parse_unquote_plus,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -71,5 +73,10 @@ class TestCompat(unittest.TestCase):
 | 
			
		|||
    def test_compat_shlex_split(self):
 | 
			
		||||
        self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
 | 
			
		||||
 | 
			
		||||
    def test_compat_etree_fromstring(self):
 | 
			
		||||
        xml = '<el foo="bar"></el>'
 | 
			
		||||
        doc = compat_etree_fromstring(xml.encode('utf-8'))
 | 
			
		||||
        self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -68,6 +68,9 @@ from youtube_dl.utils import (
 | 
			
		|||
    cli_valueless_option,
 | 
			
		||||
    cli_bool_option,
 | 
			
		||||
)
 | 
			
		||||
from youtube_dl.compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestUtil(unittest.TestCase):
 | 
			
		||||
| 
						 | 
				
			
			@ -242,7 +245,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		|||
            <node x="b" y="d" />
 | 
			
		||||
            <node x="" />
 | 
			
		||||
        </root>'''
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(testxml)
 | 
			
		||||
        doc = compat_etree_fromstring(testxml)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
 | 
			
		||||
        self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
 | 
			
		||||
| 
						 | 
				
			
			@ -263,7 +266,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		|||
                <url>http://server.com/download.mp3</url>
 | 
			
		||||
            </media:song>
 | 
			
		||||
        </root>'''
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(testxml)
 | 
			
		||||
        doc = compat_etree_fromstring(testxml)
 | 
			
		||||
        find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
 | 
			
		||||
        self.assertTrue(find('media:song') is not None)
 | 
			
		||||
        self.assertEqual(find('media:song/media:author').text, 'The Author')
 | 
			
		||||
| 
						 | 
				
			
			@ -285,7 +288,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		|||
                <p>Foo</p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </root>'''
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(testxml)
 | 
			
		||||
        doc = compat_etree_fromstring(testxml)
 | 
			
		||||
        self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
 | 
			
		||||
        self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
 | 
			
		||||
        self.assertTrue(xpath_text(doc, 'div/bar') is None)
 | 
			
		||||
| 
						 | 
				
			
			@ -297,7 +300,7 @@ class TestUtil(unittest.TestCase):
 | 
			
		|||
                <p x="a">Foo</p>
 | 
			
		||||
            </div>
 | 
			
		||||
        </root>'''
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(testxml)
 | 
			
		||||
        doc = compat_etree_fromstring(testxml)
 | 
			
		||||
        self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
 | 
			
		||||
        self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
 | 
			
		||||
        self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,6 +14,7 @@ import socket
 | 
			
		|||
import subprocess
 | 
			
		||||
import sys
 | 
			
		||||
import itertools
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
| 
						 | 
				
			
			@ -212,6 +213,29 @@ try:
 | 
			
		|||
except ImportError:  # Python 2.6
 | 
			
		||||
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 | 
			
		||||
 | 
			
		||||
if sys.version_info[0] >= 3:
 | 
			
		||||
    compat_etree_fromstring = xml.etree.ElementTree.fromstring
 | 
			
		||||
else:
 | 
			
		||||
    # on python 2.x the the attributes of a node are str objects instead of
 | 
			
		||||
    # unicode
 | 
			
		||||
    etree = xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
    # on 2.6 XML doesn't have a parser argument, function copied from CPython
 | 
			
		||||
    # 2.7 source
 | 
			
		||||
    def _XML(text, parser=None):
 | 
			
		||||
        if not parser:
 | 
			
		||||
            parser = etree.XMLParser(target=etree.TreeBuilder())
 | 
			
		||||
        parser.feed(text)
 | 
			
		||||
        return parser.close()
 | 
			
		||||
 | 
			
		||||
    def _element_factory(*args, **kwargs):
 | 
			
		||||
        el = etree.Element(*args, **kwargs)
 | 
			
		||||
        for k, v in el.items():
 | 
			
		||||
            el.set(k, v.decode('utf-8'))
 | 
			
		||||
        return el
 | 
			
		||||
 | 
			
		||||
    def compat_etree_fromstring(text):
 | 
			
		||||
        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from urllib.parse import parse_qs as compat_parse_qs
 | 
			
		||||
| 
						 | 
				
			
			@ -507,6 +531,7 @@ __all__ = [
 | 
			
		|||
    'compat_chr',
 | 
			
		||||
    'compat_cookiejar',
 | 
			
		||||
    'compat_cookies',
 | 
			
		||||
    'compat_etree_fromstring',
 | 
			
		||||
    'compat_expanduser',
 | 
			
		||||
    'compat_get_terminal_size',
 | 
			
		||||
    'compat_getenv',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,10 +5,10 @@ import io
 | 
			
		|||
import itertools
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import xml.etree.ElementTree as etree
 | 
			
		||||
 | 
			
		||||
from .fragment import FragmentFD
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
| 
						 | 
				
			
			@ -290,7 +290,7 @@ class F4mFD(FragmentFD):
 | 
			
		|||
        man_url = urlh.geturl()
 | 
			
		||||
        manifest = urlh.read()
 | 
			
		||||
 | 
			
		||||
        doc = etree.fromstring(manifest)
 | 
			
		||||
        doc = compat_etree_fromstring(manifest)
 | 
			
		||||
        formats = [(int(f.attrib.get('bitrate', -1)), f)
 | 
			
		||||
                   for f in self._get_unencrypted_media(doc)]
 | 
			
		||||
        if requested_bitrate is None:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,6 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
| 
						 | 
				
			
			@ -14,7 +13,10 @@ from ..utils import (
 | 
			
		|||
    remove_end,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
from ..compat import compat_HTTPError
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BBCCoUkIE(InfoExtractor):
 | 
			
		||||
| 
						 | 
				
			
			@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
 | 
			
		|||
                url, programme_id, 'Downloading media selection XML')
 | 
			
		||||
        except ExtractorError as ee:
 | 
			
		||||
            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
 | 
			
		||||
                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
 | 
			
		||||
                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
 | 
			
		||||
            else:
 | 
			
		||||
                raise
 | 
			
		||||
        return self._process_media_selector(media_selection, programme_id)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,9 +4,11 @@ from __future__ import unicode_literals
 | 
			
		|||
import re
 | 
			
		||||
import itertools
 | 
			
		||||
import json
 | 
			
		||||
import xml.etree.ElementTree as ET
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
| 
						 | 
				
			
			@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
 | 
			
		|||
        except ValueError:
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        lq_doc = ET.fromstring(lq_page)
 | 
			
		||||
        lq_doc = compat_etree_fromstring(lq_page)
 | 
			
		||||
        lq_durls = lq_doc.findall('./durl')
 | 
			
		||||
 | 
			
		||||
        hq_doc = self._download_xml(
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,10 +3,10 @@ from __future__ import unicode_literals
 | 
			
		|||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
| 
						 | 
				
			
			@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):
 | 
			
		|||
        object_str = fix_xml_ampersands(object_str)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
 | 
			
		||||
            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
 | 
			
		||||
        except compat_xml_parse_error:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,6 @@ import re
 | 
			
		|||
import socket
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_cookiejar,
 | 
			
		||||
| 
						 | 
				
			
			@ -23,6 +22,7 @@ from ..compat import (
 | 
			
		|||
    compat_urllib_request,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    NO_DEFAULT,
 | 
			
		||||
| 
						 | 
				
			
			@ -461,7 +461,7 @@ class InfoExtractor(object):
 | 
			
		|||
            return xml_string
 | 
			
		||||
        if transform_source:
 | 
			
		||||
            xml_string = transform_source(xml_string)
 | 
			
		||||
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
 | 
			
		||||
        return compat_etree_fromstring(xml_string.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
    def _download_json(self, url_or_request, video_id,
 | 
			
		||||
                       note='Downloading JSON metadata',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -5,12 +5,12 @@ import re
 | 
			
		|||
import json
 | 
			
		||||
import base64
 | 
			
		||||
import zlib
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from hashlib import sha1
 | 
			
		||||
from math import pow, sqrt, floor
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_parse_unquote,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
| 
						 | 
				
			
			@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		|||
        return output
 | 
			
		||||
 | 
			
		||||
    def _extract_subtitles(self, subtitle):
 | 
			
		||||
        sub_root = xml.etree.ElementTree.fromstring(subtitle)
 | 
			
		||||
        sub_root = compat_etree_fromstring(subtitle)
 | 
			
		||||
        return [{
 | 
			
		||||
            'ext': 'srt',
 | 
			
		||||
            'data': self._convert_subtitles_to_srt(sub_root),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,10 +1,10 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
| 
						 | 
				
			
			@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):
 | 
			
		|||
        if last_version['version'] == -1:
 | 
			
		||||
            raise ExtractorError('Unable to extract last version of the video')
 | 
			
		||||
 | 
			
		||||
        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
 | 
			
		||||
        renditions = compat_etree_fromstring(last_version['data'])
 | 
			
		||||
        formats = []
 | 
			
		||||
        # Already sorted from worst to best quality
 | 
			
		||||
        for rend in renditions.findall('rendition'):
 | 
			
		||||
| 
						 | 
				
			
			@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):
 | 
			
		|||
 | 
			
		||||
    def _formats_from_smil(self, smil_xml):
 | 
			
		||||
        formats = []
 | 
			
		||||
        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
 | 
			
		||||
        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
 | 
			
		||||
        els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
 | 
			
		||||
        for el in els:
 | 
			
		||||
            src = el.attrib['src']
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -36,6 +36,7 @@ import zlib
 | 
			
		|||
from .compat import (
 | 
			
		||||
    compat_basestring,
 | 
			
		||||
    compat_chr,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
    compat_html_entities,
 | 
			
		||||
    compat_http_client,
 | 
			
		||||
    compat_kwargs,
 | 
			
		||||
| 
						 | 
				
			
			@ -1974,7 +1975,7 @@ def dfxp2srt(dfxp_data):
 | 
			
		|||
 | 
			
		||||
        return out
 | 
			
		||||
 | 
			
		||||
    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
 | 
			
		||||
    dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
 | 
			
		||||
    out = []
 | 
			
		||||
    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue