Strip invalid XML characters from response
This commit is contained in:
parent
2469a6ea47
commit
9676714dcf
|
@ -24,6 +24,7 @@ import hashlib
|
||||||
import html.entities
|
import html.entities
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import shelve
|
import shelve
|
||||||
import ssl
|
import ssl
|
||||||
import tempfile
|
import tempfile
|
||||||
|
@ -969,7 +970,7 @@ class _Request:
|
||||||
conn.close()
|
conn.close()
|
||||||
return response_text
|
return response_text
|
||||||
|
|
||||||
def execute(self, cacheable=False):
|
def execute(self, cacheable: bool = False) -> xml.dom.minidom.Document:
|
||||||
"""Returns the XML DOM response of the POST Request from the server"""
|
"""Returns the XML DOM response of the POST Request from the server"""
|
||||||
|
|
||||||
if self.network.is_caching_enabled() and cacheable:
|
if self.network.is_caching_enabled() and cacheable:
|
||||||
|
@ -977,13 +978,12 @@ class _Request:
|
||||||
else:
|
else:
|
||||||
response = self._download_response()
|
response = self._download_response()
|
||||||
|
|
||||||
return minidom.parseString(_string(response).replace("opensearch:", ""))
|
return _parse_response(response)
|
||||||
|
|
||||||
def _check_response_for_errors(self, response):
|
def _check_response_for_errors(self, response):
|
||||||
"""Checks the response for errors and raises one if any exists."""
|
"""Checks the response for errors and raises one if any exists."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
doc = minidom.parseString(_string(response).replace("opensearch:", ""))
|
doc = _parse_response(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedResponseError(self.network, e) from e
|
raise MalformedResponseError(self.network, e) from e
|
||||||
|
|
||||||
|
@ -2950,4 +2950,20 @@ def _unescape_htmlentity(string):
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_response(response: str) -> xml.dom.minidom.Document:
|
||||||
|
response = _string(response).replace("opensearch:", "")
|
||||||
|
try:
|
||||||
|
doc = minidom.parseString(response)
|
||||||
|
except xml.parsers.expat.ExpatError:
|
||||||
|
# Try again. For performance, we only remove when needed in rare cases.
|
||||||
|
doc = minidom.parseString(_remove_invalid_xml_chars(response))
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_invalid_xml_chars(string: str) -> str:
|
||||||
|
return re.sub(
|
||||||
|
r"[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFF]+", "", string
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# End of file
|
# End of file
|
||||||
|
|
|
@ -27,3 +27,42 @@ def test_get_cache_key(artist):
|
||||||
def test_cast_and_hash(obj):
|
def test_cast_and_hash(obj):
|
||||||
assert type(str(obj)) is str
|
assert type(str(obj)) is str
|
||||||
assert isinstance(hash(obj), int)
|
assert isinstance(hash(obj), int)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"test_input, expected",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
# Plain text
|
||||||
|
'<album mbid="">test album name</album>',
|
||||||
|
'<album mbid="">test album name</album>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
# Contains Unicode ENQ Enquiry control character
|
||||||
|
'<album mbid="">test album \u0005name</album>',
|
||||||
|
'<album mbid="">test album name</album>',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test__remove_invalid_xml_chars(test_input: str, expected: str) -> None:
|
||||||
|
assert pylast._remove_invalid_xml_chars(test_input) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"test_input, expected",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
# Plain text
|
||||||
|
'<album mbid="">test album name</album>',
|
||||||
|
'<?xml version="1.0" ?><album mbid="">test album name</album>',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
# Contains Unicode ENQ Enquiry control character
|
||||||
|
'<album mbid="">test album \u0005name</album>',
|
||||||
|
'<?xml version="1.0" ?><album mbid="">test album name</album>',
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test__parse_response(test_input: str, expected: str) -> None:
|
||||||
|
doc = pylast._parse_response(test_input)
|
||||||
|
assert doc.toxml() == expected
|
||||||
|
|
Loading…
Reference in a new issue