[utils] Handle HTMLParseError in extract_attributes (closes #13349)
This commit is contained in:
parent
72b409559c
commit
b4a3d461e4
2 changed files with 9 additions and 2 deletions
|
@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase):
|
||||||
supports_outside_bmp = False
|
supports_outside_bmp = False
|
||||||
if supports_outside_bmp:
|
if supports_outside_bmp:
|
||||||
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
|
||||||
|
# Malformed HTML should not break attributes extraction on older Python
|
||||||
|
self.assertEqual(extract_attributes('<mal"formed/>'), {})
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
|
|
|
@ -36,6 +36,7 @@ import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_HTMLParseError,
|
||||||
compat_HTMLParser,
|
compat_HTMLParser,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
|
@ -409,8 +410,12 @@ def extract_attributes(html_element):
|
||||||
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
|
||||||
"""
|
"""
|
||||||
parser = HTMLAttributeParser()
|
parser = HTMLAttributeParser()
|
||||||
|
try:
|
||||||
parser.feed(html_element)
|
parser.feed(html_element)
|
||||||
parser.close()
|
parser.close()
|
||||||
|
# Older Python may throw HTMLParseError in case of malformed HTML
|
||||||
|
except compat_HTMLParseError:
|
||||||
|
pass
|
||||||
return parser.attrs
|
return parser.attrs
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue