Refactor helper functions into a utils package

2022-07-05 11:07:18 +03:00 · 2022-07-05 11:07:18 +03:00 · 620323eab0
parent ece37c4659
commit 620323eab0
3 changed files with 179 additions and 145 deletions
--- a/src/pylast/init.py
+++ b/src/pylast/init.py
@ -22,17 +22,13 @@ from __future__ import annotations
 import collections
 import hashlib
 import html.entities
 import logging
 import os
 import re
 import shelve
 import ssl
 import tempfile
 import time
-import xml.dom
+from xml.dom import minidom
 from urllib.parse import quote_plus
 from xml.dom import Node, minidom
 import httpx
@ -43,12 +39,29 @@ except ImportError:
    # Python 3.7 and lower
    import importlib_metadata  # type: ignore
 from .utils import (
    _collect_nodes,
    _number,
    _parse_response,
    _string_output,
    _unescape_htmlentity,
    _unicode,
    _url_safe,
    cleanup_nodes,
    md5,
 )
 __author__ = "Amr Hassan, hugovk, Mice Pápai"
 __copyright__ = "Copyright (C) 2008-2010 Amr Hassan, 2013-2021 hugovk, 2017 Mice Pápai"
 __license__ = "apache2"
 __email__ = "amr.hassan@gmail.com"
 __version__ = importlib_metadata.version(__name__)
 __all__ = [
    # Utils
    cleanup_nodes,
    md5,
 ]
 # 1 : This error does not exist
 STATUS_INVALID_SERVICE = 2
@ -938,7 +951,7 @@ class _Request:
            client.close()
        return response_text
-    def execute(self, cacheable: bool = False) -> xml.dom.minidom.Document:
+    def execute(self, cacheable: bool = False) -> minidom.Document:
        """Returns the XML DOM response of the POST Request from the server"""
        if self.network.is_caching_enabled() and cacheable:
@ -1089,13 +1102,6 @@ Image = collections.namedtuple(
 )
 def _string_output(func):
    def r(*args):
        return str(func(*args))
    return r
 class _BaseObject:
    """An abstract webservices object."""
@ -2720,89 +2726,6 @@ class TrackSearch(_Search):
        return seq
 def md5(text):
    """Returns the md5 hash of a string."""
    h = hashlib.md5()
    h.update(_unicode(text).encode("utf-8"))
    return h.hexdigest()
 def _unicode(text):
    if isinstance(text, bytes):
        return str(text, "utf-8")
    else:
        return str(text)
 def cleanup_nodes(doc):
    """
    Remove text nodes containing only whitespace
    """
    for node in doc.documentElement.childNodes:
        if node.nodeType == Node.TEXT_NODE and node.nodeValue.isspace():
            doc.documentElement.removeChild(node)
    return doc
 def _collect_nodes(
    limit, sender, method_name, cacheable, params=None, stream: bool = False
 ):
    """
    Returns a sequence of dom.Node objects about as close to limit as possible
    """
    if not params:
        params = sender._get_params()
    def _stream_collect_nodes():
        node_count = 0
        page = 1
        end_of_pages = False
        while not end_of_pages and (not limit or (limit and node_count < limit)):
            params["page"] = str(page)
            tries = 1
            while True:
                try:
                    doc = sender._request(method_name, cacheable, params)
                    break  # success
                except Exception as e:
                    if tries >= 3:
                        raise PyLastError() from e
                    # Wait and try again
                    time.sleep(1)
                    tries += 1
            doc = cleanup_nodes(doc)
            # break if there are no child nodes
            if not doc.documentElement.childNodes:
                break
            main = doc.documentElement.childNodes[0]
            if main.hasAttribute("totalPages") or main.hasAttribute("totalpages"):
                total_pages = _number(
                    main.getAttribute("totalPages") or main.getAttribute("totalpages")
                )
            else:
                raise PyLastError("No total pages attribute")
            for node in main.childNodes:
                if not node.nodeType == xml.dom.Node.TEXT_NODE and (
                    not limit or (node_count < limit)
                ):
                    node_count += 1
                    yield node
            end_of_pages = page >= total_pages
            page += 1
    return _stream_collect_nodes() if stream else list(_stream_collect_nodes())
 def _extract(node, name, index: int = 0):
    """Extracts a value from the xml string"""
@ -2878,51 +2801,3 @@ def _extract_tracks(doc, network):
        artist = _extract(node, "name", 1)
        seq.append(Track(artist, name, network))
    return seq
 def _url_safe(text):
    """Does all kinds of tricks on a text to make it safe to use in a URL."""
    return quote_plus(quote_plus(str(text))).lower()
 def _number(string):
    """
    Extracts an int from a string.
    Returns a 0 if None or an empty string was passed.
    """
    if not string:
        return 0
    else:
        try:
            return int(string)
        except ValueError:
            return float(string)
 def _unescape_htmlentity(string):
    mapping = html.entities.name2codepoint
    for key in mapping:
        string = string.replace(f"&{key};", chr(mapping[key]))
    return string
 def _parse_response(response: str) -> xml.dom.minidom.Document:
    response = str(response).replace("opensearch:", "")
    try:
        doc = minidom.parseString(response)
    except xml.parsers.expat.ExpatError:
        # Try again. For performance, we only remove when needed in rare cases.
        doc = minidom.parseString(_remove_invalid_xml_chars(response))
    return doc
 def _remove_invalid_xml_chars(string: str) -> str:
    return re.sub(
        r"[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFF]+", "", string
    )
 # End of file
--- a/src/pylast/utils.py
+++ b/src/pylast/utils.py
@ -0,0 +1,159 @@
 from __future__ import annotations
 import hashlib
 import html
 import re
 import time
 import warnings
 import xml
 from urllib.parse import quote_plus
 from xml.dom import Node, minidom
 import pylast
 def cleanup_nodes(doc: minidom.Document) -> minidom.Document:
    """
    cleanup_nodes is deprecated and will be removed in pylast 6.0
    """
    warnings.warn(
        "cleanup_nodes is deprecated and will be removed in pylast 6.0",
        DeprecationWarning,
        stacklevel=2,
    )
    return _cleanup_nodes(doc)
 def md5(text: str) -> str:
    """Returns the md5 hash of a string."""
    h = hashlib.md5()
    h.update(_unicode(text).encode("utf-8"))
    return h.hexdigest()
 def _collect_nodes(
    limit, sender, method_name, cacheable, params=None, stream: bool = False
 ):
    """
    Returns a sequence of dom.Node objects about as close to limit as possible
    """
    if not params:
        params = sender._get_params()
    def _stream_collect_nodes():
        node_count = 0
        page = 1
        end_of_pages = False
        while not end_of_pages and (not limit or (limit and node_count < limit)):
            params["page"] = str(page)
            tries = 1
            while True:
                try:
                    doc = sender._request(method_name, cacheable, params)
                    break  # success
                except Exception as e:
                    if tries >= 3:
                        raise pylast.PyLastError() from e
                    # Wait and try again
                    time.sleep(1)
                    tries += 1
            doc = _cleanup_nodes(doc)
            # break if there are no child nodes
            if not doc.documentElement.childNodes:
                break
            main = doc.documentElement.childNodes[0]
            if main.hasAttribute("totalPages") or main.hasAttribute("totalpages"):
                total_pages = _number(
                    main.getAttribute("totalPages") or main.getAttribute("totalpages")
                )
            else:
                raise pylast.PyLastError("No total pages attribute")
            for node in main.childNodes:
                if not node.nodeType == xml.dom.Node.TEXT_NODE and (
                    not limit or (node_count < limit)
                ):
                    node_count += 1
                    yield node
            end_of_pages = page >= total_pages
            page += 1
    return _stream_collect_nodes() if stream else list(_stream_collect_nodes())
 def _cleanup_nodes(doc: minidom.Document) -> minidom.Document:
    """
    Remove text nodes containing only whitespace
    """
    for node in doc.documentElement.childNodes:
        if node.nodeType == Node.TEXT_NODE and node.nodeValue.isspace():
            doc.documentElement.removeChild(node)
    return doc
 def _number(string: str | None) -> float:
    """
    Extracts an int from a string.
    Returns a 0 if None or an empty string was passed.
    """
    if not string:
        return 0
    else:
        try:
            return int(string)
        except ValueError:
            return float(string)
 def _parse_response(response: str) -> xml.dom.minidom.Document:
    response = str(response).replace("opensearch:", "")
    try:
        doc = minidom.parseString(response)
    except xml.parsers.expat.ExpatError:
        # Try again. For performance, we only remove when needed in rare cases.
        doc = minidom.parseString(_remove_invalid_xml_chars(response))
    return doc
 def _remove_invalid_xml_chars(string: str) -> str:
    return re.sub(
        r"[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\u10000-\u10FFF]+", "", string
    )
 def _string_output(func):
    def r(*args):
        return str(func(*args))
    return r
 def _unescape_htmlentity(string: str) -> str:
    mapping = html.entities.name2codepoint
    for key in mapping:
        string = string.replace(f"&{key};", chr(mapping[key]))
    return string
 def _unicode(text: bytes | str) -> str:
    if isinstance(text, bytes):
        return str(text, "utf-8")
    else:
        return str(text)
 def _url_safe(text: str) -> str:
    """Does all kinds of tricks on a text to make it safe to use in a URL."""
    return quote_plus(quote_plus(str(text))).lower()
--- a/tests/unicode_test.py
+++ b/tests/unicode_test.py
@ -45,7 +45,7 @@ def test_cast_and_hash(obj) -> None:
    ],
 )
 def test__remove_invalid_xml_chars(test_input: str, expected: str) -> None:
-    assert pylast._remove_invalid_xml_chars(test_input) == expected
+    assert pylast.utils._remove_invalid_xml_chars(test_input) == expected
@pytest.mark.parametrize(