Skip to content

Commit af2f66e

Browse files
woodruffwewdurbin
authored andcommitted
Event-based Malware check (#7249)
* requirements: Introduce yara * [WIP] malware/check: SetupPatternCheck In progress. Introduces SetupPatternCheck, an implementation of an event-based check that scans the `setup.py`s of release files for suspicious patterns. * malware/checks: Give MalwareCheckBase.run/scan args, kwargs * malware: Add check preparation Fiddle with the check/run signature a bit more. * malware/checks: Unpack file path correctly * docker-compose: Override FILES_BACKEND for worker The worker needs to be able to see the "files" virtual host during development so that malware checks can fetch their underlying release files. * [WIP] malware/checks: setup.py extraction * malware/checks: setup_patterns: Fix enum, seek * malware/checks: setup_patterns: Apply YARA rules Each rule match becomes a verdict. * malware/checks: setup_patterns: Prefer get over filter * warehouse/{admin,malware}: Consistent enum names Also enforce uniqueness for enum values. * warehouse/{admin,malware}: More enum changes * tests: Update admin, malware tests * tests: Fix enum, more test fixes * tests: Add prepare tests * malware/changes: base: Unpack id correctly * tests: Begin adding SetupPatternCheck tests * malware/checks: setup_patterns: Fix enum * tests: More SetupPatternCheck tests * warehouse/malware: setup_patterns: Fix enums * tests: More SetupPatternCheck tests * tests: Add license header * malware/checks: setup_patterns: Add TODO * tests: More SetupPatternCheck tests * tests: More SetupPatternCheck tests * tests: Complete extraction tests for SetupPatternCheck * tests: Fix test * malware/checks: Add docstring for prepare * malware/checks: blacken * malware/checks: Document, expand YARA rules * tests, warehouse: Restructure utilities * malware: Order some enums, reduce SetupPatternCheck verdicts * malware/models: Add missing __lt__ * malware/checks: Always embed the model object in the prepared arguments Use it instead of performing a DB request in the check itself. * malware/checks: Avoid raw bytes * malware/changes: Remove unused import * tests: Fixup malware tests * warehouse/malware: blacken * tests: Fill in malware coverage * tests, warehouse: Add a benign verdict for SetupPatternCheck * tests: blacken
1 parent 734a3fa commit af2f66e

24 files changed

+863
-92
lines changed

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ services:
9393
env_file: dev/environment
9494
environment:
9595
C_FORCE_ROOT: "1"
96+
FILES_BACKEND: "warehouse.packaging.services.LocalFileStorage path=/var/opt/warehouse/packages/ url=http://files:9001/packages/{path}"
9697
links:
9798
- db
9899
- redis

requirements/main.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,6 @@ typeguard
5555
webauthn
5656
whitenoise
5757
WTForms>=2.0.0
58+
yara-python
5859
zope.sqlalchemy
5960
zxcvbn

requirements/main.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,20 @@ wired==0.2.1 \
593593
wtforms==2.2.1 \
594594
--hash=sha256:0cdbac3e7f6878086c334aa25dc5a33869a3954e9d1e015130d65a69309b3b61 \
595595
--hash=sha256:e3ee092c827582c50877cdbd49e9ce6d2c5c1f6561f849b3b068c1b8029626f1
596+
yara-python==3.11.0 \
597+
--hash=sha256:105d851e050b32951ee577148c7f1b18c0a7c64432fef8159069191d522fba86 \
598+
--hash=sha256:1d35c7f606465015de02143dfa4e1ad2f4ee85fdb5d5af756b51b2bac62ac7bc \
599+
--hash=sha256:24cd492d6bf8ecedb128f5b02886770be9df03bd1b84ab06a978d45bb1a8ff92 \
600+
--hash=sha256:58cfc837e7769811afbfb19b1db952ec01e50cdbf9df576fb587e1e343694526 \
601+
--hash=sha256:5b8d708751a66d1507d819218d06baccdf5527c147c2bd3062f087e2f367a17d \
602+
--hash=sha256:6f90bb264470235549e1bb4e355fa82895409cd46f27aceecaddfbf55e66ed71 \
603+
--hash=sha256:70d39c2238c5854e7cd8f11595317dc4d89417e88035d8acca24bcc58a93150f \
604+
--hash=sha256:8d255349d69d833bca604b4215bdf499c87357172512273feb934f6442b8e6b2 \
605+
--hash=sha256:8e44f9600607cb1d74a0f26df5d0a1c06ea54f4601206124f47f1bbb58e6a374 \
606+
--hash=sha256:9e4fafc327e3a343c545dcf5f173fa8bc712aebffe5f034d205c0bac1f1c5df6 \
607+
--hash=sha256:c919ee656139ed46a0056e8a3de179bbc98d42a2be6fb85c95b1e2ec65396b34 \
608+
--hash=sha256:e4124414d3cff9a10669569a89f585f81c8114b283ab48b2e756e0347a89de0a \
609+
--hash=sha256:f104f0bb21a0867f22e750bb4e05de629ec9f37facc84daf963385a86371b0d9
596610
zipp==1.0.0 \
597611
--hash=sha256:8dda78f06bd1674bd8720df8a50bb47b6e1233c503a4eed8e7810686bde37656 \
598612
--hash=sha256:d38fbe01bbf7a3593a32bc35a9c4453c32bc42b98c377f9bff7e9f8da157786c

tests/common/checks/hooked.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ class ExampleHookedCheck(MalwareCheckBase):
2626
def __init__(self, db):
2727
super().__init__(db)
2828

29-
def scan(self, file_id=None):
29+
def scan(self, **kwargs):
30+
file_id = kwargs.get("obj_id")
31+
if file_id is None:
32+
return
33+
3034
self.add_verdict(
3135
file_id=file_id,
32-
classification=VerdictClassification.benign,
36+
classification=VerdictClassification.Benign,
3337
confidence=VerdictConfidence.High,
3438
message="Nothing to see here!",
3539
)

tests/unit/admin/views/test_checks.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ def test_no_check_state(self, db_request):
7272
views.change_check_state(db_request)
7373

7474
@pytest.mark.parametrize(
75-
("final_state"), [MalwareCheckState.disabled, MalwareCheckState.wiped_out]
75+
("final_state"), [MalwareCheckState.Disabled, MalwareCheckState.WipedOut]
7676
)
7777
def test_change_to_valid_state(self, db_request, final_state):
7878
check = MalwareCheckFactory.create(
79-
name="MyCheck", state=MalwareCheckState.disabled
79+
name="MyCheck", state=MalwareCheckState.Disabled
8080
)
8181

8282
db_request.POST = {"check_state": final_state.value}
@@ -104,7 +104,7 @@ def test_change_to_valid_state(self, db_request, final_state):
104104

105105
assert check.state == final_state
106106

107-
if final_state == MalwareCheckState.wiped_out:
107+
if final_state == MalwareCheckState.WipedOut:
108108
assert wipe_out_recorder.delay.calls == [pretend.call("MyCheck")]
109109

110110
def test_change_to_invalid_state(self, db_request):
@@ -134,11 +134,11 @@ class TestRunBackfill:
134134
("check_state", "message"),
135135
[
136136
(
137-
MalwareCheckState.disabled,
137+
MalwareCheckState.Disabled,
138138
"Check must be in 'enabled' or 'evaluation' state to run a backfill.",
139139
),
140140
(
141-
MalwareCheckState.wiped_out,
141+
MalwareCheckState.WipedOut,
142142
"Check must be in 'enabled' or 'evaluation' state to run a backfill.",
143143
),
144144
],
@@ -160,7 +160,7 @@ def test_invalid_backfill_parameters(self, db_request, check_state, message):
160160
assert db_request.session.flash.calls == [pretend.call(message, queue="error")]
161161

162162
def test_sucess(self, db_request):
163-
check = MalwareCheckFactory.create(state=MalwareCheckState.enabled)
163+
check = MalwareCheckFactory.create(state=MalwareCheckState.Enabled)
164164
db_request.matchdict["check_name"] = check.name
165165

166166
db_request.session = pretend.stub(

tests/unit/malware/checks/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import pretend
14+
import pytest
15+
import yara
16+
17+
from warehouse.malware.checks.setup_patterns import check as c
18+
from warehouse.malware.models import (
19+
MalwareCheckState,
20+
VerdictClassification,
21+
VerdictConfidence,
22+
)
23+
24+
from .....common.db.malware import MalwareCheckFactory
25+
from .....common.db.packaging import FileFactory
26+
27+
28+
def test_initializes(db_session):
29+
check_model = MalwareCheckFactory.create(
30+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
31+
)
32+
check = c.SetupPatternCheck(db_session)
33+
34+
assert check.id == check_model.id
35+
assert isinstance(check._yara_rules, yara.Rules)
36+
37+
38+
@pytest.mark.parametrize(
39+
("obj", "file_url"), [(None, pretend.stub()), (pretend.stub(), None)]
40+
)
41+
def test_scan_missing_kwargs(db_session, obj, file_url):
42+
MalwareCheckFactory.create(
43+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
44+
)
45+
check = c.SetupPatternCheck(db_session)
46+
check.scan(obj=obj, file_url=file_url)
47+
48+
assert check._verdicts == []
49+
50+
51+
def test_scan_non_sdist(db_session):
52+
MalwareCheckFactory.create(
53+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
54+
)
55+
check = c.SetupPatternCheck(db_session)
56+
57+
file = FileFactory.create(packagetype="bdist_wheel")
58+
59+
check.scan(obj=file, file_url=pretend.stub())
60+
61+
assert check._verdicts == []
62+
63+
64+
def test_scan_no_setup_contents(db_session, monkeypatch):
65+
monkeypatch.setattr(
66+
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub())
67+
)
68+
monkeypatch.setattr(
69+
c, "extract_file_content", pretend.call_recorder(lambda *a: None)
70+
)
71+
72+
MalwareCheckFactory.create(
73+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
74+
)
75+
check = c.SetupPatternCheck(db_session)
76+
77+
file = FileFactory.create(packagetype="sdist")
78+
79+
check.scan(obj=file, file_url=pretend.stub())
80+
81+
assert len(check._verdicts) == 1
82+
assert check._verdicts[0].check_id == check.id
83+
assert check._verdicts[0].file_id == file.id
84+
assert check._verdicts[0].classification == VerdictClassification.Indeterminate
85+
assert check._verdicts[0].confidence == VerdictConfidence.High
86+
assert (
87+
check._verdicts[0].message
88+
== "sdist does not contain a suitable setup.py for analysis"
89+
)
90+
91+
92+
def test_scan_benign_contents(db_session, monkeypatch):
93+
monkeypatch.setattr(
94+
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub())
95+
)
96+
monkeypatch.setattr(
97+
c,
98+
"extract_file_content",
99+
pretend.call_recorder(lambda *a: b"this is a benign string"),
100+
)
101+
102+
MalwareCheckFactory.create(
103+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
104+
)
105+
check = c.SetupPatternCheck(db_session)
106+
107+
file = FileFactory.create(packagetype="sdist")
108+
109+
check.scan(obj=file, file_url=pretend.stub())
110+
111+
assert len(check._verdicts) == 1
112+
assert check._verdicts[0].check_id == check.id
113+
assert check._verdicts[0].file_id == file.id
114+
assert check._verdicts[0].classification == VerdictClassification.Benign
115+
assert check._verdicts[0].confidence == VerdictConfidence.Low
116+
assert check._verdicts[0].message == "No malicious patterns found in setup.py"
117+
118+
119+
def test_scan_matched_content(db_session, monkeypatch):
120+
monkeypatch.setattr(
121+
c, "fetch_url_content", pretend.call_recorder(lambda *a: pretend.stub())
122+
)
123+
monkeypatch.setattr(
124+
c,
125+
"extract_file_content",
126+
pretend.call_recorder(
127+
lambda *a: b"this looks suspicious: os.system('cat /etc/passwd')"
128+
),
129+
)
130+
131+
MalwareCheckFactory.create(
132+
name="SetupPatternCheck", state=MalwareCheckState.Enabled
133+
)
134+
check = c.SetupPatternCheck(db_session)
135+
136+
file = FileFactory.create(packagetype="sdist")
137+
138+
check.scan(obj=file, file_url=pretend.stub())
139+
140+
assert len(check._verdicts) == 1
141+
assert check._verdicts[0].check_id == check.id
142+
assert check._verdicts[0].file_id == file.id
143+
assert check._verdicts[0].classification == VerdictClassification.Threat
144+
assert check._verdicts[0].confidence == VerdictConfidence.High
145+
assert check._verdicts[0].message == "process_spawn_in_setup"
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import io
14+
import tarfile
15+
import zipfile
16+
17+
import pretend
18+
19+
from warehouse.malware.checks import utils
20+
21+
22+
def test_fetch_url_content(monkeypatch):
23+
response = pretend.stub(
24+
raise_for_status=pretend.call_recorder(lambda: None), content=b"fake content"
25+
)
26+
requests = pretend.stub(get=pretend.call_recorder(lambda url: response))
27+
28+
monkeypatch.setattr(utils, "requests", requests)
29+
30+
io = utils.fetch_url_content("hxxp://fake_url.com")
31+
32+
assert requests.get.calls == [pretend.call("hxxp://fake_url.com")]
33+
assert response.raise_for_status.calls == [pretend.call()]
34+
assert io.getvalue() == b"fake content"
35+
36+
37+
def test_extract_file_contents_zip():
38+
zipbuf = io.BytesIO()
39+
with zipfile.ZipFile(zipbuf, mode="w") as zipobj:
40+
zipobj.writestr("toplevelgetsskipped", b"nothing to see here")
41+
zipobj.writestr("foo/setup.py", b"these are some contents")
42+
zipbuf.seek(0)
43+
44+
assert utils.extract_file_content(zipbuf, "setup.py") == b"these are some contents"
45+
46+
47+
def test_extract_file_contents_zip_no_file():
48+
zipbuf = io.BytesIO()
49+
with zipfile.ZipFile(zipbuf, mode="w") as zipobj:
50+
zipobj.writestr("foo/notsetup.py", b"these are some contents")
51+
zipbuf.seek(0)
52+
53+
assert utils.extract_file_content(zipbuf, "setup.py") is None
54+
55+
56+
def test_extract_file_contents_tar():
57+
tarbuf = io.BytesIO()
58+
with tarfile.open(fileobj=tarbuf, mode="w:gz") as tarobj:
59+
contents = io.BytesIO(b"these are some contents")
60+
member = tarfile.TarInfo(name="foo/setup.py")
61+
member.size = len(contents.getbuffer())
62+
tarobj.addfile(member, fileobj=contents)
63+
64+
contents = io.BytesIO(b"nothing to see here")
65+
member = tarfile.TarInfo(name="toplevelgetsskipped")
66+
member.size = len(contents.getbuffer())
67+
tarobj.addfile(member, fileobj=contents)
68+
tarbuf.seek(0)
69+
70+
assert utils.extract_file_content(tarbuf, "setup.py") == b"these are some contents"
71+
72+
73+
def test_extract_file_contents_tar_empty():
74+
tarbuf = io.BytesIO(b"invalid tar contents")
75+
76+
assert utils.extract_file_content(tarbuf, "setup.py") is None
77+
78+
79+
def test_extract_file_contents_tar_no_file():
80+
tarbuf = io.BytesIO()
81+
with tarfile.open(fileobj=tarbuf, mode="w:gz") as tarobj:
82+
contents = io.BytesIO(b"these are some contents")
83+
member = tarfile.TarInfo(name="foo/notsetup.py")
84+
member.size = len(contents.getbuffer())
85+
tarobj.addfile(member, fileobj=contents)
86+
87+
contents = io.BytesIO(b"nothing to see here")
88+
member = tarfile.TarInfo(name="toplevelgetsskipped")
89+
member.size = len(contents.getbuffer())
90+
tarobj.addfile(member, fileobj=contents)
91+
tarbuf.seek(0)
92+
93+
assert utils.extract_file_content(tarbuf, "setup.py") is None

0 commit comments

Comments
 (0)