Apply patch to fix illegal XML characters, closes #71

This commit is contained in:
hugovk 2014-03-02 22:54:02 +02:00
parent 6db87f8a27
commit c6c8d45635
2 changed files with 26 additions and 3 deletions

View file

@ -114,6 +114,16 @@ SCROBBLE_MODE_LOVED = "L"
SCROBBLE_MODE_BANNED = "B"
SCROBBLE_MODE_SKIPPED = "S"
# From http://boodebr.org/main/python/all-about-python-and-unicode#UNI_XML
RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
u'|' + \
u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
(unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff))
XML_ILLEGAL = re.compile(RE_XML_ILLEGAL)
class _Network(object):
"""
A music social network website that is Last.fm or one exposing a Last.fm compatible API
@ -848,9 +858,7 @@ class _Request(object):
except Exception as e:
raise MalformedResponseError(self.network, e)
# Pretty decent catch for invalid & characters - which Last.fm
# seems to generate for some artist eg. "K'nann"
response_text = re.sub("&(?![^\W]+;)", "&", response_text)
response_text = XML_ILLEGAL.sub("?", response_text)
self._check_response_for_errors(response_text)
return response_text
@ -4045,3 +4053,5 @@ class Scrobbler(object):
if remainder:
self.scrobble_many(remainder)
# End of file

View file

@ -471,6 +471,19 @@ class TestPyLast(unittest.TestCase):
self.helper_is_thing_hashable(xspf)
def test_invalid_xml(self):
# Arrange
# Currently causes PCDATA invalid Char value 25
artist = "Blind Willie Johnson"
title = "It's nobody's fault but mine"
# Act
search = self.network.search_for_track(artist, title)
total = search.get_total_result_count()
# Assert
self.assertGreaterEqual(total, 0)
if __name__ == '__main__':
# For quick testing of a single case (eg. test = "test_scrobble")