Added option to stream from resources to reduce memory usage

This commit is contained in:
kvanzuijlen 2020-07-12 11:54:46 +02:00 committed by Koen van Zuijlen
parent 705052ae66
commit 92004058ba
8 changed files with 174 additions and 192 deletions

View file

@ -103,7 +103,7 @@ export PYLAST_API_SECRET=TODO_ENTER_YOURS_HERE
To run all unit and integration tests:
```sh
pip install pytest flaky
pip install .[tests]
pytest
```

View file

@ -1149,21 +1149,22 @@ class _BaseObject:
return first_child.wholeText.strip()
def _get_things(self, method, thing, thing_type, params=None, cacheable=True):
def _get_things(self, method, thing, thing_type, params=None, cacheable=True, stream=False):
"""Returns a list of the most played thing_types by this thing."""
limit = params.get("limit", 1)
seq = []
for node in _collect_nodes(
limit, self, self.ws_prefix + "." + method, cacheable, params
):
title = _extract(node, "name")
artist = _extract(node, "name", 1)
playcount = _number(_extract(node, "playcount"))
def _stream_get_things():
limit = params.get("limit", 1)
nodes = _collect_nodes(
limit, self, self.ws_prefix + "." + method, cacheable, params, stream=stream,
)
for node in nodes:
title = _extract(node, "name")
artist = _extract(node, "name", 1)
playcount = _number(_extract(node, "playcount"))
seq.append(TopItem(thing_type(artist, title, self.network), playcount))
yield TopItem(thing_type(artist, title, self.network), playcount)
return seq
return _stream_get_things() if stream else list(_stream_get_things())
def get_wiki_published_date(self):
"""
@ -1835,21 +1836,21 @@ class Artist(_BaseObject, _Taggable):
return artists
def get_top_albums(self, limit=None, cacheable=True):
def get_top_albums(self, limit=None, cacheable=True, stream=True):
"""Returns a list of the top albums."""
params = self._get_params()
if limit:
params["limit"] = limit
return self._get_things("getTopAlbums", "album", Album, params, cacheable)
return self._get_things("getTopAlbums", "album", Album, params, cacheable, stream=stream)
def get_top_tracks(self, limit=None, cacheable=True):
def get_top_tracks(self, limit=None, cacheable=True, stream=True):
"""Returns a list of the most played Tracks by this artist."""
params = self._get_params()
if limit:
params["limit"] = limit
return self._get_things("getTopTracks", "track", Track, params, cacheable)
return self._get_things("getTopTracks", "track", Track, params, cacheable, stream=stream)
def get_url(self, domain_name=DOMAIN_ENGLISH):
"""Returns the URL of the artist page on the network.
@ -1917,13 +1918,13 @@ class Country(_BaseObject):
return _extract_top_artists(doc, self)
def get_top_tracks(self, limit=None, cacheable=True):
def get_top_tracks(self, limit=None, cacheable=True, stream=True):
"""Returns a sequence of the most played tracks"""
params = self._get_params()
if limit:
params["limit"] = limit
return self._get_things("getTopTracks", "track", Track, params, cacheable)
return self._get_things("getTopTracks", "track", Track, params, cacheable, stream=stream)
def get_url(self, domain_name=DOMAIN_ENGLISH):
"""Returns the URL of the country page on the network.
@ -1978,24 +1979,24 @@ class Library(_BaseObject):
"""Returns the user who owns this library."""
return self.user
def get_artists(self, limit=50, cacheable=True):
def get_artists(self, limit=50, cacheable=True, stream=True):
"""
Returns a sequence of Album objects
if limit==None it will return all (may take a while)
"""
seq = []
for node in _collect_nodes(
limit, self, self.ws_prefix + ".getArtists", cacheable
):
name = _extract(node, "name")
def _get_artists():
for node in _collect_nodes(
limit, self, self.ws_prefix + ".getArtists", cacheable, stream=stream
):
name = _extract(node, "name")
playcount = _number(_extract(node, "playcount"))
tagcount = _number(_extract(node, "tagcount"))
playcount = _number(_extract(node, "playcount"))
tagcount = _number(_extract(node, "tagcount"))
seq.append(LibraryItem(Artist(name, self.network), playcount, tagcount))
yield LibraryItem(Artist(name, self.network), playcount, tagcount)
return seq
return _get_artists() if stream else list(_get_artists())
class Tag(_BaseObject, _Chartable):
@ -2047,13 +2048,13 @@ class Tag(_BaseObject, _Chartable):
return _extract_top_albums(doc, self.network)
def get_top_tracks(self, limit=None, cacheable=True):
def get_top_tracks(self, limit=None, cacheable=True, stream=True):
"""Returns a list of the most played Tracks for this tag."""
params = self._get_params()
if limit:
params["limit"] = limit
return self._get_things("getTopTracks", "track", Track, params, cacheable)
return self._get_things("getTopTracks", "track", Track, params, cacheable, stream=stream)
def get_top_artists(self, limit=None, cacheable=True):
"""Returns a sequence of the most played artists."""
@ -2241,6 +2242,14 @@ class User(_BaseObject, _Chartable):
def _get_params(self):
return {self.ws_prefix: self.get_name()}
def _extract_played_track(self, track_node):
title = _extract(track_node, "name")
track_artist = _extract(track_node, "artist")
date = _extract(track_node, "date")
album = _extract(track_node, "album")
timestamp = track_node.getElementsByTagName("date")[0].getAttribute("uts")
return PlayedTrack(Track(track_artist, title, self.network), album, date, timestamp)
def get_name(self, properly_capitalized=False):
"""Returns the user name."""
@ -2251,7 +2260,7 @@ class User(_BaseObject, _Chartable):
return self.name
def get_artist_tracks(self, artist, cacheable=False):
def get_artist_tracks(self, artist, cacheable=False, stream=True):
"""
Deprecated by Last.fm.
Get a list of tracks by a given artist scrobbled by this user,
@ -2269,63 +2278,56 @@ class User(_BaseObject, _Chartable):
params = self._get_params()
params["artist"] = artist
seq = []
for track in _collect_nodes(
None, self, self.ws_prefix + ".getArtistTracks", cacheable, params
):
title = _extract(track, "name")
artist = _extract(track, "artist")
date = _extract(track, "date")
album = _extract(track, "album")
timestamp = track.getElementsByTagName("date")[0].getAttribute("uts")
def _get_artist_tracks():
for track_node in _collect_nodes(
None, self, self.ws_prefix + ".getArtistTracks", cacheable, params, stream=stream,
):
yield self._extract_played_track(track_node=track_node)
seq.append(
PlayedTrack(Track(artist, title, self.network), album, date, timestamp)
)
return _get_artist_tracks() if stream else list(_get_artist_tracks())
return seq
def get_friends(self, limit=50, cacheable=False):
def get_friends(self, limit=50, cacheable=False, stream=True):
"""Returns a list of the user's friends. """
seq = []
for node in _collect_nodes(
limit, self, self.ws_prefix + ".getFriends", cacheable
):
seq.append(User(_extract(node, "name"), self.network))
def _get_friends():
for node in _collect_nodes(
limit, self, self.ws_prefix + ".getFriends", cacheable, stream=stream
):
yield User(_extract(node, "name"), self.network)
return seq
return _get_friends() if stream else list(_get_friends())
def get_loved_tracks(self, limit=50, cacheable=True):
def get_loved_tracks(self, limit=50, cacheable=True, stream=True):
"""
Returns this user's loved track as a sequence of LovedTrack objects in
reverse order of their timestamp, all the way back to the first track.
If limit==None, it will try to pull all the available data.
If stream=True, it will yield tracks as soon as a page has been retrieved.
This method uses caching. Enable caching only if you're pulling a
large amount of data.
"""
params = self._get_params()
if limit:
params["limit"] = limit
def _get_loved_tracks():
params = self._get_params()
if limit:
params["limit"] = limit
seq = []
for track in _collect_nodes(
limit, self, self.ws_prefix + ".getLovedTracks", cacheable, params
):
try:
artist = _extract(track, "name", 1)
except IndexError: # pragma: no cover
continue
title = _extract(track, "name")
date = _extract(track, "date")
timestamp = track.getElementsByTagName("date")[0].getAttribute("uts")
for track in _collect_nodes(
limit, self, self.ws_prefix + ".getLovedTracks", cacheable, params, stream=stream
):
try:
artist = _extract(track, "name", 1)
except IndexError: # pragma: no cover
continue
title = _extract(track, "name")
date = _extract(track, "date")
timestamp = track.getElementsByTagName("date")[0].getAttribute("uts")
seq.append(LovedTrack(Track(artist, title, self.network), date, timestamp))
yield LovedTrack(Track(artist, title, self.network), date, timestamp)
return seq
return _get_loved_tracks() if stream else list(_get_loved_tracks())
def get_now_playing(self):
"""
@ -2353,7 +2355,7 @@ class User(_BaseObject, _Chartable):
return Track(artist, title, self.network, self.name, info=info)
def get_recent_tracks(self, limit=10, cacheable=True, time_from=None, time_to=None):
def get_recent_tracks(self, limit=10, cacheable=True, time_from=None, time_to=None, stream=True):
"""
Returns this user's played track as a sequence of PlayedTrack objects
in reverse order of playtime, all the way back to the first track.
@ -2368,45 +2370,35 @@ class User(_BaseObject, _Chartable):
before this time, in UNIX timestamp format (integer number of
seconds since 00:00:00, January 1st 1970 UTC). This must be in
the UTC time zone.
stream: If True, it will yield tracks as soon as a page has been retrieved.
This method uses caching. Enable caching only if you're pulling a
large amount of data.
"""
params = self._get_params()
if limit:
params["limit"] = limit + 1 # in case we remove the now playing track
if time_from:
params["from"] = time_from
if time_to:
params["to"] = time_to
def _get_recent_tracks():
params = self._get_params()
if limit:
params["limit"] = limit + 1 # in case we remove the now playing track
if time_from:
params["from"] = time_from
if time_to:
params["to"] = time_to
seq = []
for track in _collect_nodes(
limit + 1 if limit else None,
self,
self.ws_prefix + ".getRecentTracks",
cacheable,
params,
):
for track_node in _collect_nodes(
limit + 1 if limit else None,
self,
self.ws_prefix + ".getRecentTracks",
cacheable,
params,
stream=stream
):
if track_node.hasAttribute("nowplaying"):
continue # to prevent the now playing track from sneaking in
if track.hasAttribute("nowplaying"):
continue # to prevent the now playing track from sneaking in
yield self._extract_played_track(track_node=track_node)
title = _extract(track, "name")
artist = _extract(track, "artist")
date = _extract(track, "date")
album = _extract(track, "album")
timestamp = track.getElementsByTagName("date")[0].getAttribute("uts")
seq.append(
PlayedTrack(Track(artist, title, self.network), album, date, timestamp)
)
if limit:
# Slice, in case we didn't remove a now playing track
seq = seq[:limit]
return seq
return _get_recent_tracks() if stream else list(_get_recent_tracks())
def get_country(self):
"""Returns the name of the country of the user."""
@ -2545,7 +2537,7 @@ class User(_BaseObject, _Chartable):
return seq
def get_top_tracks(self, period=PERIOD_OVERALL, limit=None, cacheable=True):
def get_top_tracks(self, period=PERIOD_OVERALL, limit=None, cacheable=True, stream=True):
"""Returns the top tracks played by a user.
* period: The period of time. Possible values:
o PERIOD_OVERALL
@ -2561,33 +2553,24 @@ class User(_BaseObject, _Chartable):
if limit:
params["limit"] = limit
return self._get_things("getTopTracks", "track", Track, params, cacheable)
return self._get_things("getTopTracks", "track", Track, params, cacheable, stream=stream)
def get_track_scrobbles(self, artist, track, cacheable=False):
def get_track_scrobbles(self, artist, track, cacheable=False, stream=True):
"""
Get a list of this user's scrobbles of this artist's track,
including scrobble time.
"""
params = self._get_params()
params["artist"] = artist
params["track"] = track
seq = []
for track in _collect_nodes(
None, self, self.ws_prefix + ".getTrackScrobbles", cacheable, params
):
title = _extract(track, "name")
artist = _extract(track, "artist")
date = _extract(track, "date")
album = _extract(track, "album")
timestamp = track.getElementsByTagName("date")[0].getAttribute("uts")
def _get_track_scrobbles():
for track_node in _collect_nodes(
None, self, self.ws_prefix + ".getTrackScrobbles", cacheable, params, stream=stream
):
yield self._extract_played_track(track_node)
seq.append(
PlayedTrack(Track(artist, title, self.network), album, date, timestamp)
)
return seq
return _get_track_scrobbles() if stream else list(_get_track_scrobbles())
def get_image(self, size=SIZE_EXTRA_LARGE):
"""
@ -2797,59 +2780,57 @@ def cleanup_nodes(doc):
return doc
def _collect_nodes(limit, sender, method_name, cacheable, params=None):
def _collect_nodes(limit, sender, method_name, cacheable, params=None, stream=False):
"""
Returns a sequence of dom.Node objects about as close to limit as possible
"""
if not params:
params = sender._get_params()
nodes = []
page = 1
end_of_pages = False
def _stream_collect_nodes():
node_count = 0
page = 1
end_of_pages = False
while not end_of_pages and (not limit or (limit and len(nodes) < limit)):
params["page"] = str(page)
while not end_of_pages and (not limit or (limit and node_count < limit)):
params["page"] = str(page)
tries = 1
while True:
try:
doc = sender._request(method_name, cacheable, params)
break # success
except Exception as e:
if tries >= 3:
raise e
# Wait and try again
time.sleep(1)
tries += 1
tries = 1
while True:
try:
doc = sender._request(method_name, cacheable, params)
break # success
except Exception as e:
if tries >= 3:
raise e
# Wait and try again
time.sleep(1)
tries += 1
doc = cleanup_nodes(doc)
doc = cleanup_nodes(doc)
# break if there are no child nodes
if not doc.documentElement.childNodes:
break
main = doc.documentElement.childNodes[0]
# break if there are no child nodes
if not doc.documentElement.childNodes:
break
main = doc.documentElement.childNodes[0]
if main.hasAttribute("totalPages"):
total_pages = _number(main.getAttribute("totalPages"))
elif main.hasAttribute("totalpages"):
total_pages = _number(main.getAttribute("totalpages"))
else:
raise Exception("No total pages attribute")
if main.hasAttribute("totalPages") or main.hasAttribute("totalpages"):
total_pages = _number(main.getAttribute("totalPages") or main.getAttribute("totalpages"))
else:
raise Exception("No total pages attribute")
for node in main.childNodes:
if not node.nodeType == xml.dom.Node.TEXT_NODE and (
not limit or (len(nodes) < limit)
):
nodes.append(node)
for node in main.childNodes:
if not node.nodeType == xml.dom.Node.TEXT_NODE and (
not limit or (node_count < limit)
):
node_count += 1
yield node
if page >= total_pages:
end_of_pages = True
end_of_pages = page >= total_pages
page += 1
page += 1
return nodes
return _stream_collect_nodes() if stream else list(_stream_collect_nodes())
def _extract(node, name, index=0):

View file

@ -32,7 +32,7 @@ class TestPyLastAlbum(TestPyLastWithLastFm):
# Act
# limit=2 to ignore now-playing:
track = lastfm_user.get_recent_tracks(limit=2)[0]
track = list(lastfm_user.get_recent_tracks(limit=2))[0]
# Assert
assert hasattr(track, "album")

View file

@ -2,9 +2,9 @@
"""
Integration (not unit) tests for pylast.py
"""
import pylast
import pytest
import pylast
from .test_pylast import WRITE_TEST, TestPyLastWithLastFm
@ -78,7 +78,7 @@ class TestPyLastArtist(TestPyLastWithLastFm):
artist = self.network.get_top_artists(limit=1)[0].item
# Act
things = artist.get_top_tracks(limit=2)
things = artist.get_top_tracks(limit=2, stream=False)
# Assert
self.helper_two_different_things_in_top_list(things, pylast.Track)
@ -89,7 +89,7 @@ class TestPyLastArtist(TestPyLastWithLastFm):
artist = self.network.get_top_artists(limit=1)[0].item
# Act
things = artist.get_top_albums(limit=2)
things = list(artist.get_top_albums(limit=2))
# Assert
self.helper_two_different_things_in_top_list(things, pylast.Album)
@ -101,7 +101,7 @@ class TestPyLastArtist(TestPyLastWithLastFm):
artist = self.network.get_top_artists(limit=1)[0].item
# Act
things = artist.get_top_albums(limit=limit)
things = artist.get_top_albums(limit=limit, stream=False)
# Assert
assert len(things) == 1
@ -113,7 +113,7 @@ class TestPyLastArtist(TestPyLastWithLastFm):
artist = self.network.get_top_artists(limit=1)[0].item
# Act
things = artist.get_top_albums(limit=limit)
things = artist.get_top_albums(limit=limit, stream=False)
# Assert
assert len(things) == 50
@ -125,7 +125,7 @@ class TestPyLastArtist(TestPyLastWithLastFm):
artist = self.network.get_top_artists(limit=1)[0].item
# Act
things = artist.get_top_albums(limit=limit)
things = list(artist.get_top_albums(limit=limit))
# Assert
assert len(things) == 100

View file

@ -5,9 +5,9 @@ Integration (not unit) tests for pylast.py
import re
import time
import pylast
import pytest
import pylast
from .test_pylast import WRITE_TEST, TestPyLastWithLastFm
@ -26,7 +26,7 @@ class TestPyLastNetwork(TestPyLastWithLastFm):
# Assert
# limit=2 to ignore now-playing:
last_scrobble = lastfm_user.get_recent_tracks(limit=2)[0]
last_scrobble = list(lastfm_user.get_recent_tracks(limit=2))[0]
assert str(last_scrobble.track.artist).lower() == artist
assert str(last_scrobble.track.title).lower() == title
@ -153,7 +153,7 @@ class TestPyLastNetwork(TestPyLastWithLastFm):
country = self.network.get_country("Croatia")
# Act
things = country.get_top_tracks(limit=2)
things = country.get_top_tracks(limit=2, stream=False)
# Assert
self.helper_two_different_things_in_top_list(things, pylast.Track)
@ -171,7 +171,7 @@ class TestPyLastNetwork(TestPyLastWithLastFm):
tag = self.network.get_tag("blues")
# Act
things = tag.get_top_tracks(limit=2)
things = tag.get_top_tracks(limit=2, stream=False)
# Assert
self.helper_two_different_things_in_top_list(things, pylast.Track)

View file

@ -6,10 +6,11 @@ import os
import sys
import time
import pylast
import pytest
from flaky import flaky
import pylast
WRITE_TEST = sys.version_info[:2] == (3, 8)
@ -82,9 +83,9 @@ class TestPyLastWithLastFm(PyLastTestCase):
assert a is not None
assert b is not None
assert c is not None
assert len(a) >= 0
assert len(b) >= 0
assert len(c) >= 0
assert isinstance(len(a), int)
assert isinstance(len(b), int)
assert isinstance(len(c), int)
assert a == b
assert b == c
@ -94,9 +95,9 @@ class TestPyLastWithLastFm(PyLastTestCase):
func = getattr(thing, function_name, None)
# Act
result1 = func(limit=1, cacheable=False)
result2 = func(limit=1, cacheable=True)
result3 = func(limit=1)
result1 = func(limit=1, cacheable=False, stream=False)
result2 = func(limit=1, cacheable=True, stream=False)
result3 = list(func(limit=1))
# Assert
self.helper_validate_results(result1, result2, result3)

View file

@ -4,9 +4,9 @@ Integration (not unit) tests for pylast.py
"""
import time
import pylast
import pytest
import pylast
from .test_pylast import WRITE_TEST, TestPyLastWithLastFm
@ -23,7 +23,7 @@ class TestPyLastTrack(TestPyLastWithLastFm):
track.love()
# Assert
loved = lastfm_user.get_loved_tracks(limit=1)
loved = list(lastfm_user.get_loved_tracks(limit=1))
assert str(loved[0].track.artist).lower() == "test artist"
assert str(loved[0].track.title).lower() == "test title"
@ -41,7 +41,7 @@ class TestPyLastTrack(TestPyLastWithLastFm):
time.sleep(1) # Delay, for Last.fm latency. TODO Can this be removed later?
# Assert
loved = lastfm_user.get_loved_tracks(limit=1)
loved = list(lastfm_user.get_loved_tracks(limit=1))
if len(loved): # OK to be empty but if not:
assert str(loved[0].track.artist) != "Test Artist"
assert str(loved[0].track.title) != "test title"
@ -79,7 +79,7 @@ class TestPyLastTrack(TestPyLastWithLastFm):
def test_track_is_hashable(self):
# Arrange
artist = self.network.get_artist("Test Artist")
track = artist.get_top_tracks()[0].item
track = artist.get_top_tracks(stream=False)[0].item
assert isinstance(track, pylast.Track)
# Act/Assert

View file

@ -8,9 +8,9 @@ import os
import re
import warnings
import pylast
import pytest
import pylast
from .test_pylast import TestPyLastWithLastFm
@ -142,10 +142,10 @@ class TestPyLastUser(TestPyLastWithLastFm):
user = self.network.get_user("test-user")
# Act/Assert
assert len(user.get_loved_tracks(limit=20)) == 20
assert len(user.get_loved_tracks(limit=100)) <= 100
assert len(user.get_loved_tracks(limit=None)) >= 23
assert len(user.get_loved_tracks(limit=0)) >= 23
assert len(user.get_loved_tracks(limit=20, stream=False)) == 20
assert len(user.get_loved_tracks(limit=100, stream=False)) <= 100
assert len(user.get_loved_tracks(limit=None, stream=False)) >= 23
assert len(user.get_loved_tracks(limit=0, stream=False)) >= 23
def test_user_is_hashable(self):
# Arrange
@ -210,7 +210,7 @@ class TestPyLastUser(TestPyLastWithLastFm):
lastfm_user = self.network.get_user("RJ")
# Act
things = lastfm_user.get_top_tracks(limit=2)
things = lastfm_user.get_top_tracks(limit=2, stream=False)
# Assert
self.helper_two_different_things_in_top_list(things, pylast.Track)
@ -361,7 +361,7 @@ class TestPyLastUser(TestPyLastWithLastFm):
utc_end = calendar.timegm(end.utctimetuple())
# Act
tracks = lastfm_user.get_recent_tracks(time_from=utc_start, time_to=utc_end)
tracks = lastfm_user.get_recent_tracks(time_from=utc_start, time_to=utc_end, stream=False)
# Assert
assert len(tracks) == 1
@ -379,7 +379,7 @@ class TestPyLastUser(TestPyLastWithLastFm):
# Act
tracks = lastfm_user.get_recent_tracks(
time_from=utc_start, time_to=utc_end, limit=None
time_from=utc_start, time_to=utc_end, limit=None, stream=False
)
# Assert
@ -449,7 +449,7 @@ class TestPyLastUser(TestPyLastWithLastFm):
user = self.network.get_user("bbc6music")
# Act
scrobbles = user.get_track_scrobbles(artist, title)
scrobbles = user.get_track_scrobbles(artist, title, stream=False)
# Assert
assert len(scrobbles) > 0
@ -463,9 +463,9 @@ class TestPyLastUser(TestPyLastWithLastFm):
user = self.network.get_user("bbc6music")
# Act
result1 = user.get_track_scrobbles(artist, title, cacheable=False)
result2 = user.get_track_scrobbles(artist, title, cacheable=True)
result3 = user.get_track_scrobbles(artist, title)
result1 = user.get_track_scrobbles(artist, title, cacheable=False, stream=False)
result2 = list(user.get_track_scrobbles(artist, title, cacheable=True))
result3 = list(user.get_track_scrobbles(artist, title))
# Assert
self.helper_validate_results(result1, result2, result3)
@ -480,4 +480,4 @@ class TestPyLastUser(TestPyLastWithLastFm):
match="Deprecated - This type of request is no longer supported",
):
warnings.filterwarnings("ignore", category=DeprecationWarning)
lastfm_user.get_artist_tracks(artist="Test Artist")
lastfm_user.get_artist_tracks(artist="Test Artist", stream=False)