From a75a3b8891630adac022e12d4a182c57da25a79b Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Tue, 6 Mar 2018 17:38:11 -0600 Subject: [PATCH 1/5] Generate cache keys via a key_factory --- tests/unit/cache/origin/test_init.py | 20 ++++++++++++++++---- tests/unit/packaging/test_init.py | 22 +++++++++++++++++----- warehouse/cache/origin/__init__.py | 13 +++++++++++-- warehouse/packaging/__init__.py | 21 +++++++++++++++++---- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/tests/unit/cache/origin/test_init.py b/tests/unit/cache/origin/test_init.py index 97c306612ab3..833c98920111 100644 --- a/tests/unit/cache/origin/test_init.py +++ b/tests/unit/cache/origin/test_init.py @@ -203,8 +203,14 @@ class TestKeyMaker: def test_both_cache_and_purge(self): key_maker = origin.key_maker_factory( - cache_keys=["foo", "foo/{obj.attr}"], - purge_keys=["bar", "bar/{obj.attr}"], + cache_keys=[ + origin.key_factory("foo"), + origin.key_factory("foo/{obj.attr}"), + ], + purge_keys=[ + origin.key_factory("bar"), + origin.key_factory("bar/{obj.attr}"), + ], ) assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( cache=["foo", "foo/bar"], @@ -213,7 +219,10 @@ def test_both_cache_and_purge(self): def test_only_cache(self): key_maker = origin.key_maker_factory( - cache_keys=["foo", "foo/{obj.attr}"], + cache_keys=[ + origin.key_factory("foo"), + origin.key_factory("foo/{obj.attr}"), + ], purge_keys=None, ) assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( @@ -224,7 +233,10 @@ def test_only_cache(self): def test_only_purge(self): key_maker = origin.key_maker_factory( cache_keys=None, - purge_keys=["bar", "bar/{obj.attr}"], + purge_keys=[ + origin.key_factory("bar"), + origin.key_factory("bar/{obj.attr}"), + ], ) assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( cache=[], diff --git a/tests/unit/packaging/test_init.py b/tests/unit/packaging/test_init.py index dd7eca2c5e6f..41233b4de72b 100644 --- a/tests/unit/packaging/test_init.py +++ b/tests/unit/packaging/test_init.py @@ -33,6 +33,11 @@ def test_includme(monkeypatch, with_trending): packaging, "RedisDownloadStatService", download_stat_service_cls, ) + def key_factory(keystring): + return pretend.call(keystring) + + monkeypatch.setattr(packaging, 'key_factory', key_factory) + config = pretend.stub( maybe_dotted=lambda dotted: storage_class, register_service=pretend.call_recorder( @@ -71,15 +76,22 @@ def test_includme(monkeypatch, with_trending): assert config.register_origin_cache_keys.calls == [ pretend.call( Project, - cache_keys=["project/{obj.normalized_name}"], - purge_keys=["project/{obj.normalized_name}", "all-projects"], + cache_keys=[ + key_factory("project/{obj.normalized_name}"), + ], + purge_keys=[ + key_factory("project/{obj.normalized_name}"), + key_factory("all-projects"), + ], ), pretend.call( Release, - cache_keys=["project/{obj.project.normalized_name}"], + cache_keys=[ + key_factory("project/{obj.project.normalized_name}"), + ], purge_keys=[ - "project/{obj.project.normalized_name}", - "all-projects", + key_factory("project/{obj.project.normalized_name}"), + key_factory("all-projects"), ], ), ] diff --git a/warehouse/cache/origin/__init__.py b/warehouse/cache/origin/__init__.py index 97005c3b4e13..fa084732f093 100644 --- a/warehouse/cache/origin/__init__.py +++ b/warehouse/cache/origin/__init__.py @@ -12,6 +12,7 @@ import collections import functools +from itertools import chain from warehouse import db from warehouse.cache.origin.interfaces import IOriginCache @@ -87,6 +88,14 @@ def wrapped(context, request): CacheKeys = collections.namedtuple("CacheKeys", ["cache", "purge"]) +def key_factory(keystring): + + def generate_key(obj): + yield keystring.format(obj=obj) + + return generate_key + + def key_maker_factory(cache_keys, purge_keys): if cache_keys is None: cache_keys = [] @@ -96,8 +105,8 @@ def key_maker_factory(cache_keys, purge_keys): def key_maker(obj): return CacheKeys( - cache=[k.format(obj=obj) for k in cache_keys], - purge=[k.format(obj=obj) for k in purge_keys], + cache=list(chain.from_iterable(key(obj) for key in cache_keys)), + purge=list(chain.from_iterable(key(obj) for key in purge_keys)), ) return key_maker diff --git a/warehouse/packaging/__init__.py b/warehouse/packaging/__init__.py index ef5bb611d9a8..ba945d5a01e4 100644 --- a/warehouse/packaging/__init__.py +++ b/warehouse/packaging/__init__.py @@ -12,6 +12,7 @@ from celery.schedules import crontab +from warehouse.cache.origin import key_factory from warehouse.packaging.interfaces import IDownloadStatService, IFileStorage from warehouse.packaging.services import RedisDownloadStatService from warehouse.packaging.models import Project, Release @@ -38,13 +39,25 @@ def includeme(config): # Register our origin cache keys config.register_origin_cache_keys( Project, - cache_keys=["project/{obj.normalized_name}"], - purge_keys=["project/{obj.normalized_name}", "all-projects"], + cache_keys=[ + key_factory("project/{obj.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='users'), + ], + purge_keys=[ + key_factory("project/{obj.normalized_name}"), + key_factory("all-projects"), + ], ) config.register_origin_cache_keys( Release, - cache_keys=["project/{obj.project.normalized_name}"], - purge_keys=["project/{obj.project.normalized_name}", "all-projects"], + cache_keys=[ + key_factory("project/{obj.project.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='project.users'), + ], + purge_keys=[ + key_factory("project/{obj.project.normalized_name}"), + key_factory("all-projects"), + ], ) # Add a periodic task to compute trending once a day, assuming we have From 16eecab5820caa29db1580a9b15c9d6cf20f3e06 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Tue, 6 Mar 2018 17:51:05 -0600 Subject: [PATCH 2/5] Add cache key generation from iterable attribute --- tests/unit/cache/origin/test_init.py | 17 +++++++++++++++++ tests/unit/packaging/test_init.py | 4 ++-- warehouse/cache/origin/__init__.py | 9 +++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tests/unit/cache/origin/test_init.py b/tests/unit/cache/origin/test_init.py index 833c98920111..ed9f0f175af3 100644 --- a/tests/unit/cache/origin/test_init.py +++ b/tests/unit/cache/origin/test_init.py @@ -243,6 +243,23 @@ def test_only_purge(self): purge=["bar", "bar/bar"], ) + def test_iterate_on(self): + key_maker = origin.key_maker_factory( + cache_keys=[ + origin.key_factory('foo'), + origin.key_factory('foo/{itr}', iterate_on='iterate_me'), + ], + purge_keys=[ + origin.key_factory('bar'), + origin.key_factory('bar/{itr}', iterate_on='iterate_me'), + ], + ) + cache_keys = key_maker(pretend.stub(iterate_me=['biz', 'baz'])) + assert cache_keys == origin.CacheKeys( + cache=['foo', 'foo/biz', 'foo/baz'], + purge=['bar', 'bar/biz', 'bar/baz'], + ) + def test_register_origin_keys(monkeypatch): class Fake1: diff --git a/tests/unit/packaging/test_init.py b/tests/unit/packaging/test_init.py index 41233b4de72b..b6db9eb720c4 100644 --- a/tests/unit/packaging/test_init.py +++ b/tests/unit/packaging/test_init.py @@ -33,8 +33,8 @@ def test_includme(monkeypatch, with_trending): packaging, "RedisDownloadStatService", download_stat_service_cls, ) - def key_factory(keystring): - return pretend.call(keystring) + def key_factory(keystring, iterate_on=None): + return pretend.call(keystring, iterate_on=None) monkeypatch.setattr(packaging, 'key_factory', key_factory) diff --git a/warehouse/cache/origin/__init__.py b/warehouse/cache/origin/__init__.py index fa084732f093..d61b551b4fdd 100644 --- a/warehouse/cache/origin/__init__.py +++ b/warehouse/cache/origin/__init__.py @@ -12,6 +12,7 @@ import collections import functools +import operator from itertools import chain from warehouse import db @@ -88,10 +89,14 @@ def wrapped(context, request): CacheKeys = collections.namedtuple("CacheKeys", ["cache", "purge"]) -def key_factory(keystring): +def key_factory(keystring, iterate_on=None): def generate_key(obj): - yield keystring.format(obj=obj) + if iterate_on: + for itr in operator.attrgetter(iterate_on)(obj): + yield keystring.format(itr=itr, obj=obj) + else: + yield keystring.format(obj=obj) return generate_key From ba9b48096281827da9b13d6a122bd9e44620fa5f Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Tue, 6 Mar 2018 19:11:28 -0600 Subject: [PATCH 3/5] Purge user when projects or releases change --- tests/unit/packaging/test_init.py | 17 +++++++++++++++-- warehouse/packaging/__init__.py | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/unit/packaging/test_init.py b/tests/unit/packaging/test_init.py index b6db9eb720c4..16cd788b88ff 100644 --- a/tests/unit/packaging/test_init.py +++ b/tests/unit/packaging/test_init.py @@ -17,7 +17,7 @@ from warehouse import packaging from warehouse.packaging.interfaces import IDownloadStatService, IFileStorage -from warehouse.packaging.models import Project, Release +from warehouse.packaging.models import Project, Release, User from warehouse.packaging.tasks import compute_trending @@ -34,7 +34,7 @@ def test_includme(monkeypatch, with_trending): ) def key_factory(keystring, iterate_on=None): - return pretend.call(keystring, iterate_on=None) + return pretend.call(keystring, iterate_on=iterate_on) monkeypatch.setattr(packaging, 'key_factory', key_factory) @@ -78,9 +78,11 @@ def key_factory(keystring, iterate_on=None): Project, cache_keys=[ key_factory("project/{obj.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='users'), ], purge_keys=[ key_factory("project/{obj.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='users'), key_factory("all-projects"), ], ), @@ -88,12 +90,23 @@ def key_factory(keystring, iterate_on=None): Release, cache_keys=[ key_factory("project/{obj.project.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='project.users'), ], purge_keys=[ key_factory("project/{obj.project.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='project.users'), key_factory("all-projects"), ], ), + pretend.call( + User, + cache_keys=[ + key_factory("user/{obj.username}"), + ], + purge_keys=[ + key_factory("user/{obj.username}"), + ], + ), ] if with_trending: diff --git a/warehouse/packaging/__init__.py b/warehouse/packaging/__init__.py index ba945d5a01e4..aa2d48e68151 100644 --- a/warehouse/packaging/__init__.py +++ b/warehouse/packaging/__init__.py @@ -12,6 +12,7 @@ from celery.schedules import crontab +from warehouse.accounts.models import User from warehouse.cache.origin import key_factory from warehouse.packaging.interfaces import IDownloadStatService, IFileStorage from warehouse.packaging.services import RedisDownloadStatService @@ -45,6 +46,7 @@ def includeme(config): ], purge_keys=[ key_factory("project/{obj.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='users'), key_factory("all-projects"), ], ) @@ -56,9 +58,19 @@ def includeme(config): ], purge_keys=[ key_factory("project/{obj.project.normalized_name}"), + key_factory("user/{itr.username}", iterate_on='project.users'), key_factory("all-projects"), ], ) + config.register_origin_cache_keys( + User, + cache_keys=[ + key_factory("user/{obj.username}"), + ], + purge_keys=[ + key_factory("user/{obj.username}"), + ], + ) # Add a periodic task to compute trending once a day, assuming we have # been configured to be able to access BigQuery. From fdb12f13765da828b8ba8d96b8f3d21591f2abaf Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Fri, 9 Mar 2018 13:59:16 -0600 Subject: [PATCH 4/5] Purge user's projects when purging user This also makes it impossible to use the `key_factory` and `iterate_on` with cache keys, to avoid the problem of unbounded surrogate keys. --- tests/unit/cache/origin/test_init.py | 50 ++++++++++++---------------- tests/unit/packaging/test_init.py | 18 ++++------ warehouse/cache/origin/__init__.py | 10 ++++-- warehouse/packaging/__init__.py | 15 +++------ 4 files changed, 41 insertions(+), 52 deletions(-) diff --git a/tests/unit/cache/origin/test_init.py b/tests/unit/cache/origin/test_init.py index ed9f0f175af3..bf4b68171631 100644 --- a/tests/unit/cache/origin/test_init.py +++ b/tests/unit/cache/origin/test_init.py @@ -203,32 +203,28 @@ class TestKeyMaker: def test_both_cache_and_purge(self): key_maker = origin.key_maker_factory( - cache_keys=[ - origin.key_factory("foo"), - origin.key_factory("foo/{obj.attr}"), - ], + cache_keys=["foo", "foo/{obj.attr}"], purge_keys=[ origin.key_factory("bar"), origin.key_factory("bar/{obj.attr}"), ], ) - assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( - cache=["foo", "foo/bar"], - purge=["bar", "bar/bar"], - ) + cache_keys = key_maker(pretend.stub(attr="bar")) + + assert isinstance(cache_keys, origin.CacheKeys) + assert cache_keys.cache == ["foo", "foo/bar"] + assert list(cache_keys.purge) == ["bar", "bar/bar"] def test_only_cache(self): key_maker = origin.key_maker_factory( - cache_keys=[ - origin.key_factory("foo"), - origin.key_factory("foo/{obj.attr}"), - ], + cache_keys=["foo", "foo/{obj.attr}"], purge_keys=None, ) - assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( - cache=["foo", "foo/bar"], - purge=[], - ) + cache_keys = key_maker(pretend.stub(attr="bar")) + + assert isinstance(cache_keys, origin.CacheKeys) + assert cache_keys.cache == ["foo", "foo/bar"] + assert list(cache_keys.purge) == [] def test_only_purge(self): key_maker = origin.key_maker_factory( @@ -238,27 +234,25 @@ def test_only_purge(self): origin.key_factory("bar/{obj.attr}"), ], ) - assert key_maker(pretend.stub(attr="bar")) == origin.CacheKeys( - cache=[], - purge=["bar", "bar/bar"], - ) + cache_keys = key_maker(pretend.stub(attr="bar")) + + assert isinstance(cache_keys, origin.CacheKeys) + assert cache_keys.cache == [] + assert list(cache_keys.purge) == ["bar", "bar/bar"] def test_iterate_on(self): key_maker = origin.key_maker_factory( - cache_keys=[ - origin.key_factory('foo'), - origin.key_factory('foo/{itr}', iterate_on='iterate_me'), - ], + cache_keys=['foo'], # Intentionally does not support `iterate_on` purge_keys=[ origin.key_factory('bar'), origin.key_factory('bar/{itr}', iterate_on='iterate_me'), ], ) cache_keys = key_maker(pretend.stub(iterate_me=['biz', 'baz'])) - assert cache_keys == origin.CacheKeys( - cache=['foo', 'foo/biz', 'foo/baz'], - purge=['bar', 'bar/biz', 'bar/baz'], - ) + + assert isinstance(cache_keys, origin.CacheKeys) + assert cache_keys.cache == ['foo'] + assert list(cache_keys.purge) == ['bar', 'bar/biz', 'bar/baz'] def test_register_origin_keys(monkeypatch): diff --git a/tests/unit/packaging/test_init.py b/tests/unit/packaging/test_init.py index 16cd788b88ff..9a897c014a90 100644 --- a/tests/unit/packaging/test_init.py +++ b/tests/unit/packaging/test_init.py @@ -76,10 +76,7 @@ def key_factory(keystring, iterate_on=None): assert config.register_origin_cache_keys.calls == [ pretend.call( Project, - cache_keys=[ - key_factory("project/{obj.normalized_name}"), - key_factory("user/{itr.username}", iterate_on='users'), - ], + cache_keys=["project/{obj.normalized_name}"], purge_keys=[ key_factory("project/{obj.normalized_name}"), key_factory("user/{itr.username}", iterate_on='users'), @@ -88,10 +85,7 @@ def key_factory(keystring, iterate_on=None): ), pretend.call( Release, - cache_keys=[ - key_factory("project/{obj.project.normalized_name}"), - key_factory("user/{itr.username}", iterate_on='project.users'), - ], + cache_keys=["project/{obj.project.normalized_name}"], purge_keys=[ key_factory("project/{obj.project.normalized_name}"), key_factory("user/{itr.username}", iterate_on='project.users'), @@ -100,11 +94,13 @@ def key_factory(keystring, iterate_on=None): ), pretend.call( User, - cache_keys=[ - key_factory("user/{obj.username}"), - ], + cache_keys=["user/{obj.username}"], purge_keys=[ key_factory("user/{obj.username}"), + key_factory( + "project/{itr.normalized_name}", + iterate_on='projects', + ), ], ), ] diff --git a/warehouse/cache/origin/__init__.py b/warehouse/cache/origin/__init__.py index d61b551b4fdd..5a71e51f32d2 100644 --- a/warehouse/cache/origin/__init__.py +++ b/warehouse/cache/origin/__init__.py @@ -110,8 +110,14 @@ def key_maker_factory(cache_keys, purge_keys): def key_maker(obj): return CacheKeys( - cache=list(chain.from_iterable(key(obj) for key in cache_keys)), - purge=list(chain.from_iterable(key(obj) for key in purge_keys)), + # Note: this does not support setting the `cache` argument via + # multiple `key_factories` as we do with `purge` because there is + # a limit to how many surrogate keys we can attach to a single HTTP + # response, and being able to use use `iterate_on` would allow this + # size to be unbounded. + # ref: https://github.com/pypa/warehouse/pull/3189 + cache=[k.format(obj=obj) for k in cache_keys], + purge=chain.from_iterable(key(obj) for key in purge_keys), ) return key_maker diff --git a/warehouse/packaging/__init__.py b/warehouse/packaging/__init__.py index aa2d48e68151..3db703299059 100644 --- a/warehouse/packaging/__init__.py +++ b/warehouse/packaging/__init__.py @@ -40,10 +40,7 @@ def includeme(config): # Register our origin cache keys config.register_origin_cache_keys( Project, - cache_keys=[ - key_factory("project/{obj.normalized_name}"), - key_factory("user/{itr.username}", iterate_on='users'), - ], + cache_keys=["project/{obj.normalized_name}"], purge_keys=[ key_factory("project/{obj.normalized_name}"), key_factory("user/{itr.username}", iterate_on='users'), @@ -52,10 +49,7 @@ def includeme(config): ) config.register_origin_cache_keys( Release, - cache_keys=[ - key_factory("project/{obj.project.normalized_name}"), - key_factory("user/{itr.username}", iterate_on='project.users'), - ], + cache_keys=["project/{obj.project.normalized_name}"], purge_keys=[ key_factory("project/{obj.project.normalized_name}"), key_factory("user/{itr.username}", iterate_on='project.users'), @@ -64,11 +58,10 @@ def includeme(config): ) config.register_origin_cache_keys( User, - cache_keys=[ - key_factory("user/{obj.username}"), - ], + cache_keys=["user/{obj.username}"], purge_keys=[ key_factory("user/{obj.username}"), + key_factory("project/{itr.normalized_name}", iterate_on='projects') ], ) From 2bcc0a45cb04aff655b2ea49fdd494ca4fb64c51 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Fri, 9 Mar 2018 16:11:27 -0600 Subject: [PATCH 5/5] Delete projects when nuking user with query We don't need to do this per-project to purge the cache anymore. --- tests/unit/admin/views/test_users.py | 14 ++++---------- warehouse/admin/views/users.py | 21 ++++++++++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tests/unit/admin/views/test_users.py b/tests/unit/admin/views/test_users.py index d19499edf1e7..1ba37b2aa872 100644 --- a/tests/unit/admin/views/test_users.py +++ b/tests/unit/admin/views/test_users.py @@ -18,6 +18,7 @@ from webob.multidict import MultiDict from warehouse.admin.views import users as views +from warehouse.packaging.models import Project from ....common.db.accounts import User, UserFactory, EmailFactory from ....common.db.packaging import ( @@ -178,6 +179,7 @@ class TestUserDelete: def test_deletes_user(self, db_request, monkeypatch): user = UserFactory.create() project = ProjectFactory.create() + another_project = ProjectFactory.create() journal = JournalEntryFactory(submitted_by=user) RoleFactory(project=project, user=user, role_name='Owner') deleted_user = UserFactory.create(username="deleted-user") @@ -188,17 +190,12 @@ def test_deletes_user(self, db_request, monkeypatch): db_request.user = UserFactory.create() db_request.remote_addr = '10.10.10.10' - remove_project = pretend.call_recorder(lambda *a, **kw: None) - monkeypatch.setattr(views, 'remove_project', remove_project) - result = views.user_delete(db_request) db_request.db.flush() assert not db_request.db.query(User).get(user.id) - assert remove_project.calls == [ - pretend.call(project, db_request, flash=False), - ] + assert db_request.db.query(Project).all() == [another_project] assert db_request.route_path.calls == [pretend.call('admin.user.list')] assert result.status_code == 303 assert result.location == '/foobar' @@ -213,15 +210,12 @@ def test_deletes_user_bad_confirm(self, db_request, monkeypatch): db_request.params = {'username': 'wrong'} db_request.route_path = pretend.call_recorder(lambda a, **k: '/foobar') - remove_project = pretend.call_recorder(lambda *a, **kw: None) - monkeypatch.setattr(views, 'remove_project', remove_project) - result = views.user_delete(db_request) db_request.db.flush() assert db_request.db.query(User).get(user.id) - assert remove_project.calls == [] + assert db_request.db.query(Project).all() == [project] assert db_request.route_path.calls == [ pretend.call('admin.user.detail', user_id=user.id), ] diff --git a/warehouse/admin/views/users.py b/warehouse/admin/views/users.py index 7c68fed1c336..265ce1a491d5 100644 --- a/warehouse/admin/views/users.py +++ b/warehouse/admin/views/users.py @@ -23,9 +23,8 @@ from warehouse import forms from warehouse.accounts.models import User, Email -from warehouse.packaging.models import JournalEntry, Role +from warehouse.packaging.models import JournalEntry, Project, Role from warehouse.utils.paginate import paginate_url_factory -from warehouse.utils.project import remove_project @view_config( @@ -139,16 +138,24 @@ def user_delete(request): user = request.db.query(User).get(request.matchdict['user_id']) if user.username != request.params.get('username'): - print(user.username) - print(request.params.get('username')) request.session.flash(f'Wrong confirmation input.', queue='error') return HTTPSeeOther( request.route_path('admin.user.detail', user_id=user.id) ) - # Delete projects one by one so they are purged from the cache - for project in user.projects: - remove_project(project, request, flash=False) + # Delete all the user's projects + projects = ( + request.db.query(Project) + .filter( + Project.name.in_( + request.db.query(Project.name) + .join(Role.project) + .filter(Role.user == user) + .subquery() + ) + ) + ) + projects.delete(synchronize_session=False) # Update all journals to point to `deleted-user` instead deleted_user = (