From c6c8d45635a4d62b4f53d23da2b3ba138a6609e7 Mon Sep 17 00:00:00 2001 From: hugovk Date: Sun, 2 Mar 2014 22:54:02 +0200 Subject: [PATCH] Apply patch to fix illegal XML characters, closes #71 --- pylast.py | 16 +++++++++++++--- test_pylast.py | 13 +++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pylast.py b/pylast.py index 1de47b1..6a5bb4c 100644 --- a/pylast.py +++ b/pylast.py @@ -114,6 +114,16 @@ SCROBBLE_MODE_LOVED = "L" SCROBBLE_MODE_BANNED = "B" SCROBBLE_MODE_SKIPPED = "S" +# From http://boodebr.org/main/python/all-about-python-and-unicode#UNI_XML +RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \ + u'|' + \ + u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \ + (unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), + unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff), + unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff)) + +XML_ILLEGAL = re.compile(RE_XML_ILLEGAL) + class _Network(object): """ A music social network website that is Last.fm or one exposing a Last.fm compatible API @@ -848,9 +858,7 @@ class _Request(object): except Exception as e: raise MalformedResponseError(self.network, e) - # Pretty decent catch for invalid & characters - which Last.fm - # seems to generate for some artist eg. "K'nann" - response_text = re.sub("&(?![^\W]+;)", "&", response_text) + response_text = XML_ILLEGAL.sub("?", response_text) self._check_response_for_errors(response_text) return response_text @@ -4045,3 +4053,5 @@ class Scrobbler(object): if remainder: self.scrobble_many(remainder) + +# End of file diff --git a/test_pylast.py b/test_pylast.py index d0baf03..8925284 100755 --- a/test_pylast.py +++ b/test_pylast.py @@ -471,6 +471,19 @@ class TestPyLast(unittest.TestCase): self.helper_is_thing_hashable(xspf) + def test_invalid_xml(self): + # Arrange + # Currently causes PCDATA invalid Char value 25 + artist = "Blind Willie Johnson" + title = "It's nobody's fault but mine" + + # Act + search = self.network.search_for_track(artist, title) + total = search.get_total_result_count() + + # Assert + self.assertGreaterEqual(total, 0) + if __name__ == '__main__': # For quick testing of a single case (eg. test = "test_scrobble")