Skip to content

Commit 200673b

Browse files
xmunozewdurbin
andcommitted
Add backfill functionality to check admin #7094 (#7232)
* Add backfill functionality to check admin #7094 - Add backfill task - Change lookup of checks to check_name instead of id - Load checks that are also in "evaluation" state * Add unit tests for backfill. - Log number of runs executed by backfill - Perform basic validation on sample_rate input - Clean up other testing logic. * Remove superfluous 'all()' * Code review changes. - Set backfill size to a fix number, not configurable via web ui. - Backfill task enqueues run_check tasks - Only retry if `check.run` fails, not if loading the check fails. - Use exponential backoff for retries. * Update warehouse/admin/templates/admin/malware/checks/detail.html Co-Authored-By: Ernest W. Durbin III <[email protected]> Co-authored-by: Ernest W. Durbin III <[email protected]>
1 parent 813d398 commit 200673b

File tree

8 files changed

+247
-71
lines changed

8 files changed

+247
-71
lines changed

tests/unit/admin/test_routes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ def test_includeme():
132132
"/admin/checks/{check_name}/change_state",
133133
domain=warehouse,
134134
),
135+
pretend.call(
136+
"admin.checks.run_backfill",
137+
"/admin/checks/{check_name}/run_backfill",
138+
domain=warehouse,
139+
),
135140
pretend.call("admin.verdicts.list", "/admin/verdicts/", domain=warehouse),
136141
pretend.call(
137142
"admin.verdicts.detail", "/admin/verdicts/{verdict_id}", domain=warehouse

tests/unit/admin/views/test_checks.py

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
import uuid
14-
1513
import pretend
1614
import pytest
1715

@@ -67,6 +65,12 @@ def test_get_check_not_found(self, db_request):
6765

6866

6967
class TestChangeCheckState:
68+
def test_no_check_state(self, db_request):
69+
check = MalwareCheckFactory.create()
70+
db_request.matchdict["check_name"] = check.name
71+
with pytest.raises(HTTPNotFound):
72+
views.change_check_state(db_request)
73+
7074
@pytest.mark.parametrize(
7175
("final_state"), [MalwareCheckState.disabled, MalwareCheckState.wiped_out]
7276
)
@@ -75,7 +79,7 @@ def test_change_to_valid_state(self, db_request, final_state):
7579
name="MyCheck", state=MalwareCheckState.disabled
7680
)
7781

78-
db_request.POST = {"id": check.id, "check_state": final_state.value}
82+
db_request.POST = {"check_state": final_state.value}
7983
db_request.matchdict["check_name"] = check.name
8084

8185
db_request.session = pretend.stub(
@@ -107,7 +111,7 @@ def test_change_to_invalid_state(self, db_request):
107111
check = MalwareCheckFactory.create(name="MyCheck")
108112
initial_state = check.state
109113
invalid_check_state = "cancelled"
110-
db_request.POST = {"id": check.id, "check_state": invalid_check_state}
114+
db_request.POST = {"check_state": invalid_check_state}
111115
db_request.matchdict["check_name"] = check.name
112116

113117
db_request.session = pretend.stub(
@@ -124,13 +128,62 @@ def test_change_to_invalid_state(self, db_request):
124128
]
125129
assert check.state == initial_state
126130

127-
def test_check_not_found(self, db_request):
128-
db_request.POST = {"id": uuid.uuid4(), "check_state": "enabled"}
129-
db_request.matchdict["check_name"] = "DoesNotExist"
131+
132+
class TestRunBackfill:
133+
@pytest.mark.parametrize(
134+
("check_state", "message"),
135+
[
136+
(
137+
MalwareCheckState.disabled,
138+
"Check must be in 'enabled' or 'evaluation' state to run a backfill.",
139+
),
140+
(
141+
MalwareCheckState.wiped_out,
142+
"Check must be in 'enabled' or 'evaluation' state to run a backfill.",
143+
),
144+
],
145+
)
146+
def test_invalid_backfill_parameters(self, db_request, check_state, message):
147+
check = MalwareCheckFactory.create(state=check_state)
148+
db_request.matchdict["check_name"] = check.name
149+
150+
db_request.session = pretend.stub(
151+
flash=pretend.call_recorder(lambda *a, **kw: None)
152+
)
130153

131154
db_request.route_path = pretend.call_recorder(
132-
lambda *a, **kw: "/admin/checks/DoesNotExist/change_state"
155+
lambda *a, **kw: "/admin/checks/%s/run_backfill" % check.name
133156
)
134157

135-
with pytest.raises(HTTPNotFound):
136-
views.change_check_state(db_request)
158+
views.run_backfill(db_request)
159+
160+
assert db_request.session.flash.calls == [pretend.call(message, queue="error")]
161+
162+
def test_sucess(self, db_request):
163+
check = MalwareCheckFactory.create(state=MalwareCheckState.enabled)
164+
db_request.matchdict["check_name"] = check.name
165+
166+
db_request.session = pretend.stub(
167+
flash=pretend.call_recorder(lambda *a, **kw: None)
168+
)
169+
170+
db_request.route_path = pretend.call_recorder(
171+
lambda *a, **kw: "/admin/checks/%s/run_backfill" % check.name
172+
)
173+
174+
backfill_recorder = pretend.stub(
175+
delay=pretend.call_recorder(lambda *a, **kw: None)
176+
)
177+
178+
db_request.task = pretend.call_recorder(lambda *a, **kw: backfill_recorder)
179+
180+
views.run_backfill(db_request)
181+
182+
assert db_request.session.flash.calls == [
183+
pretend.call(
184+
"Running %s on 10000 %ss!" % (check.name, check.hooked_object.value),
185+
queue="success",
186+
)
187+
]
188+
189+
assert backfill_recorder.delay.calls == [pretend.call(check.name, 10000)]

tests/unit/malware/test_tasks.py

Lines changed: 74 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -19,76 +19,112 @@
1919
import warehouse.malware.checks as checks
2020

2121
from warehouse.malware.models import MalwareCheck, MalwareCheckState, MalwareVerdict
22-
from warehouse.malware.tasks import remove_verdicts, run_check, sync_checks
22+
from warehouse.malware.tasks import backfill, remove_verdicts, run_check, sync_checks
2323

2424
from ...common.db.malware import MalwareCheckFactory, MalwareVerdictFactory
2525
from ...common.db.packaging import FileFactory, ProjectFactory, ReleaseFactory
2626

2727

2828
class TestRunCheck:
29-
def test_success(self, monkeypatch, db_request):
30-
project = ProjectFactory.create(name="foo")
31-
release = ReleaseFactory.create(project=project)
32-
file0 = FileFactory.create(release=release, filename="foo.bar")
29+
def test_success(self, db_request):
30+
file0 = FileFactory.create()
3331
MalwareCheckFactory.create(name="ExampleCheck", state=MalwareCheckState.enabled)
34-
3532
task = pretend.stub()
3633
run_check(task, db_request, "ExampleCheck", file0.id)
34+
3735
assert db_request.db.query(MalwareVerdict).one()
3836

39-
def test_missing_check_id(self, monkeypatch, db_session):
40-
exc = NoResultFound("No row was found for one()")
37+
def test_disabled_check(self, db_request):
38+
MalwareCheckFactory.create(
39+
name="ExampleCheck", state=MalwareCheckState.disabled
40+
)
41+
task = pretend.stub()
4142

42-
class FakeMalwareCheck:
43-
def __init__(self, db):
44-
raise exc
43+
with pytest.raises(NoResultFound):
44+
run_check(
45+
task,
46+
db_request,
47+
"ExampleCheck",
48+
"d03d75d1-2511-4a8b-9759-62294a6fe3a7",
49+
)
4550

46-
checks.FakeMalwareCheck = FakeMalwareCheck
51+
def test_missing_check(self, db_request):
52+
task = pretend.stub()
53+
with pytest.raises(AttributeError):
54+
run_check(
55+
task,
56+
db_request,
57+
"DoesNotExistCheck",
58+
"d03d75d1-2511-4a8b-9759-62294a6fe3a7",
59+
)
4760

48-
class Task:
49-
@staticmethod
50-
@pretend.call_recorder
51-
def retry(exc):
52-
raise celery.exceptions.Retry
61+
def test_retry(self, db_session, monkeypatch):
62+
MalwareCheckFactory.create(
63+
name="ExampleCheck", state=MalwareCheckState.evaluation
64+
)
5365

54-
task = Task()
66+
exc = Exception("Scan failed")
5567

68+
def scan(self, file_id):
69+
raise exc
70+
71+
monkeypatch.setattr(checks.ExampleCheck, "scan", scan)
72+
73+
task = pretend.stub(
74+
retry=pretend.call_recorder(pretend.raiser(celery.exceptions.Retry)),
75+
)
5676
request = pretend.stub(
5777
db=db_session,
58-
log=pretend.stub(
59-
error=pretend.call_recorder(lambda *args, **kwargs: None),
60-
),
78+
log=pretend.stub(error=pretend.call_recorder(lambda *args, **kwargs: None)),
6179
)
6280

6381
with pytest.raises(celery.exceptions.Retry):
6482
run_check(
65-
task,
66-
request,
67-
"FakeMalwareCheck",
68-
"d03d75d1-2511-4a8b-9759-62294a6fe3a7",
83+
task, request, "ExampleCheck", "d03d75d1-2511-4a8b-9759-62294a6fe3a7"
6984
)
7085

7186
assert request.log.error.calls == [
72-
pretend.call(
73-
"Error executing check %s: %s",
74-
"FakeMalwareCheck",
75-
"No row was found for one()",
76-
)
87+
pretend.call("Error executing check ExampleCheck: Scan failed")
7788
]
7889

7990
assert task.retry.calls == [pretend.call(exc=exc)]
8091

81-
del checks.FakeMalwareCheck
8292

83-
def test_missing_check(self, db_request):
93+
class TestBackfill:
94+
def test_invalid_check_name(self, db_request):
8495
task = pretend.stub()
8596
with pytest.raises(AttributeError):
86-
run_check(
87-
task,
88-
db_request,
89-
"DoesNotExistCheck",
90-
"d03d75d1-2511-4a8b-9759-62294a6fe3a7",
91-
)
97+
backfill(task, db_request, "DoesNotExist", 1)
98+
99+
@pytest.mark.parametrize(
100+
("num_objects", "num_runs"), [(11, 1), (11, 11), (101, 90)],
101+
)
102+
def test_run(self, db_session, num_objects, num_runs):
103+
files = []
104+
for i in range(num_objects):
105+
files.append(FileFactory.create())
106+
107+
MalwareCheckFactory.create(name="ExampleCheck", state=MalwareCheckState.enabled)
108+
enqueue_recorder = pretend.stub(
109+
delay=pretend.call_recorder(lambda *a, **kw: None)
110+
)
111+
task = pretend.call_recorder(lambda *args, **kwargs: enqueue_recorder)
112+
113+
request = pretend.stub(
114+
db=db_session,
115+
log=pretend.stub(info=pretend.call_recorder(lambda *args, **kwargs: None)),
116+
task=task,
117+
)
118+
119+
backfill(task, request, "ExampleCheck", num_runs)
120+
121+
assert request.log.info.calls == [
122+
pretend.call("Running backfill on %d Files." % num_runs),
123+
]
124+
125+
assert enqueue_recorder.delay.calls == [
126+
pretend.call("ExampleCheck", files[i].id) for i in range(num_runs)
127+
]
92128

93129

94130
class TestSyncChecks:

warehouse/admin/routes.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ def includeme(config):
139139
"/admin/checks/{check_name}/change_state",
140140
domain=warehouse,
141141
)
142+
config.add_route(
143+
"admin.checks.run_backfill",
144+
"/admin/checks/{check_name}/run_backfill",
145+
domain=warehouse,
146+
)
142147
config.add_route("admin.verdicts.list", "/admin/verdicts/", domain=warehouse)
143148
config.add_route(
144149
"admin.verdicts.detail", "/admin/verdicts/{verdict_id}", domain=warehouse

warehouse/admin/templates/admin/malware/checks/detail.html

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,10 @@ <h4>Revision History</h4>
4848
<h3 class="box-title">Change State</h3>
4949
</div>
5050
<form method="POST" action="{{ request.route_path('admin.checks.change_state', check_name=check.name) }}">
51-
<input type="hidden" name="id" value="{{ check.id }}">
5251
<input name="csrf_token" type="hidden" value="{{ request.session.get_csrf_token() }}">
5352
<div class="box-body">
5453
<div class="form-group col-sm-4">
55-
<select name="check_state" id="check_state">
54+
<select class="form-control" name="check_state">
5655
{% for state in states %}
5756
<option value="{{ state.value }}" {{'disabled selected' if check.state == state else ''}}>
5857
{{ state.value }}
@@ -66,5 +65,19 @@ <h3 class="box-title">Change State</h3>
6665
</div>
6766
</form>
6867
</div>
68+
<div class="box box-primary">
69+
<div class="box-header with-border">
70+
<h3 class="box-title">Run Evaluation</h3>
71+
</div>
72+
<form method="POST" action="{{ request.route_path('admin.checks.run_backfill', check_name=check.name) }}">
73+
<input name="csrf_token" type="hidden" value="{{ request.session.get_csrf_token() }}">
74+
<div class="box-body">
75+
<p>Run this check against 10,000 {{ check.hooked_object.value }}s, selected at random. This is used to evaluate the efficacy of a check.</p>
76+
<div class="pull-right col-sm-4">
77+
<button type="submit" class="btn btn-primary pull-right">Run</button>
78+
</div>
79+
</div>
80+
</form>
81+
</div>
6982
</div>
7083
{% endblock %}

0 commit comments

Comments
 (0)