diff --git a/src/pylast/__init__.py b/src/pylast/__init__.py
index 54d4d40..e32e849 100644
--- a/src/pylast/__init__.py
+++ b/src/pylast/__init__.py
@@ -24,6 +24,7 @@ import hashlib
import html.entities
import logging
import os
+import re
import shelve
import ssl
import tempfile
@@ -969,7 +970,7 @@ class _Request:
conn.close()
return response_text
- def execute(self, cacheable=False):
+ def execute(self, cacheable: bool = False) -> xml.dom.minidom.Document:
"""Returns the XML DOM response of the POST Request from the server"""
if self.network.is_caching_enabled() and cacheable:
@@ -977,13 +978,12 @@ class _Request:
else:
response = self._download_response()
- return minidom.parseString(_string(response).replace("opensearch:", ""))
+ return _parse_response(response)
def _check_response_for_errors(self, response):
"""Checks the response for errors and raises one if any exists."""
-
try:
- doc = minidom.parseString(_string(response).replace("opensearch:", ""))
+ doc = _parse_response(response)
except Exception as e:
raise MalformedResponseError(self.network, e) from e
@@ -2950,4 +2950,20 @@ def _unescape_htmlentity(string):
return string
+def _parse_response(response: str) -> xml.dom.minidom.Document:
+ response = _string(response).replace("opensearch:", "")
+ try:
+ doc = minidom.parseString(response)
+ except xml.parsers.expat.ExpatError:
+ # Try again. For performance, we only remove when needed in rare cases.
+ doc = minidom.parseString(_remove_invalid_xml_chars(response))
+ return doc
+
+
+def _remove_invalid_xml_chars(string: str) -> str:
+ return re.sub(
+ r"[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFF]+", "", string
+ )
+
+
# End of file
diff --git a/tests/unicode_test.py b/tests/unicode_test.py
index 7b3c271..350256c 100644
--- a/tests/unicode_test.py
+++ b/tests/unicode_test.py
@@ -27,3 +27,42 @@ def test_get_cache_key(artist):
def test_cast_and_hash(obj):
assert type(str(obj)) is str
assert isinstance(hash(obj), int)
+
+
+@pytest.mark.parametrize(
+ "test_input, expected",
+ [
+ (
+ # Plain text
+ 'test album name',
+ 'test album name',
+ ),
+ (
+ # Contains Unicode ENQ Enquiry control character
+ 'test album \u0005name',
+ 'test album name',
+ ),
+ ],
+)
+def test__remove_invalid_xml_chars(test_input: str, expected: str) -> None:
+ assert pylast._remove_invalid_xml_chars(test_input) == expected
+
+
+@pytest.mark.parametrize(
+ "test_input, expected",
+ [
+ (
+ # Plain text
+ 'test album name',
+ 'test album name',
+ ),
+ (
+ # Contains Unicode ENQ Enquiry control character
+ 'test album \u0005name',
+ 'test album name',
+ ),
+ ],
+)
+def test__parse_response(test_input: str, expected: str) -> None:
+ doc = pylast._parse_response(test_input)
+ assert doc.toxml() == expected