Skip to content

Commit 0419374

Browse files
authored
fix(suspect-resolutions): Tweak commit correlation logic and track total events (#37891)
1 parent 554bcff commit 0419374

File tree

8 files changed

+87
-33
lines changed

8 files changed

+87
-33
lines changed

src/sentry/utils/suspect_resolutions/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
# make sure to increment this when making changes to anything within the 'suspect_resolutions' directory
44
# keeps track of changes to how we process suspect commits, so we can filter out analytics events by the algo version
5-
ALGO_VERSION = "0.0.3"
5+
ALGO_VERSION = "0.0.4"

src/sentry/utils/suspect_resolutions/analytics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ class SuspectResolutionEvaluation(analytics.Event):
1616
analytics.Attribute("is_commit_correlated"),
1717
analytics.Attribute("resolved_issue_release_ids"),
1818
analytics.Attribute("candidate_issue_release_ids"),
19+
analytics.Attribute("resolved_issue_total_events"),
20+
analytics.Attribute("candidate_issue_total_events"),
1921
)
2022

2123

src/sentry/utils/suspect_resolutions/commit_correlation.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime, timedelta
55
from typing import Sequence, Set
66

7-
from sentry.models import CommitFileChange, Group, GroupRelease, ReleaseCommit
7+
from sentry.models import CommitFileChange, Group, GroupRelease, Release, ReleaseCommit
88

99

1010
@dataclass
@@ -40,19 +40,20 @@ def is_issue_commit_correlated(
4040
return CommitCorrelatedResult(
4141
not resolved_filechanges.files_changed.isdisjoint(candidate_filechanges.files_changed),
4242
resolved_filechanges.release_ids,
43-
resolved_filechanges.release_ids,
43+
candidate_filechanges.release_ids,
4444
)
4545

4646

4747
def get_files_changed_in_releases(
4848
resolved_issue_time: datetime, issue_id: int, project_id: int
4949
) -> ReleaseCommitFileChanges:
50-
releases = GroupRelease.objects.filter(
51-
group_id=issue_id,
52-
project_id=project_id,
53-
last_seen__gte=(resolved_issue_time - timedelta(hours=5)),
54-
).values_list("release_id", flat=True)
55-
50+
releases = Release.objects.filter(
51+
id__in=GroupRelease.objects.filter(
52+
group_id=issue_id,
53+
project_id=project_id,
54+
).values_list("release_id", flat=True),
55+
date_added__gte=(resolved_issue_time - timedelta(hours=5)),
56+
)
5657
if not releases:
5758
return ReleaseCommitFileChanges([], set())
5859

src/sentry/utils/suspect_resolutions/get_suspect_resolutions.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from datetime import timedelta
22
from typing import Sequence
33

4+
from django.utils import timezone
5+
46
from sentry import features
57
from sentry.models import Activity, Group, GroupStatus
68
from sentry.signals import issue_resolved
@@ -15,7 +17,19 @@ def record_suspect_resolutions(
1517
organization_id, project, group, user, resolution_type, **kwargs
1618
) -> None:
1719
if features.has("projects:suspect-resolutions", project):
18-
get_suspect_resolutions.delay(group.id)
20+
if (
21+
resolution_type == "in_next_release"
22+
or resolution_type == "in_release"
23+
or resolution_type == "with_commit"
24+
or resolution_type == "in_commit"
25+
):
26+
get_suspect_resolutions.delay(
27+
group.id,
28+
eta=timezone.now() + timedelta(hours=1),
29+
expires=timezone.now() + timedelta(hours=1, minutes=30),
30+
)
31+
else:
32+
get_suspect_resolutions.delay(group.id)
1933

2034

2135
@instrumented_task(name="sentry.tasks.get_suspect_resolutions", queue="get_suspect_resolutions")
@@ -66,6 +80,8 @@ def get_suspect_resolutions(resolved_issue_id: int) -> Sequence[int]:
6680
is_commit_correlated=commit_correlation.is_correlated,
6781
resolved_issue_release_ids=commit_correlation.resolved_issue_release_ids,
6882
candidate_issue_release_ids=commit_correlation.candidate_issue_release_ids,
83+
resolved_issue_total_events=metric_correlation_result.resolved_issue_total_events,
84+
candidate_issue_total_events=metric_correlation_result.candidate_issue_total_events,
6985
)
7086

7187
return correlated_issue_ids

src/sentry/utils/suspect_resolutions/metric_correlation.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ class CandidateMetricCorrResult:
1111
candidate_suspect_resolution_id: int
1212
is_correlated: bool
1313
coefficient: float
14+
candidate_issue_total_events: int
15+
resolved_issue_total_events: int
1416

1517

1618
@dataclass
@@ -47,13 +49,18 @@ def is_issue_error_rate_correlated(
4749
x = [events for _, events in data[resolved_issue.id]]
4850
y = {csr.id: [events for _, events in data[csr.id]] for csr in candidate_suspect_resolutions}
4951

52+
resolved_issue_total_events = sum(x)
53+
candidate_issue_total_events = {csr: sum(events) for csr, events in y.items()}
54+
5055
coefficients = {csr_id: calculate_pearson_correlation_coefficient(x, y[csr_id]) for csr_id in y}
5156

5257
results = [
5358
CandidateMetricCorrResult(
5459
candidate_suspect_resolution_id=csr_id,
5560
is_correlated=coefficient > 0.4,
5661
coefficient=coefficient,
62+
candidate_issue_total_events=candidate_issue_total_events[csr_id],
63+
resolved_issue_total_events=resolved_issue_total_events,
5764
)
5865
for (csr_id, coefficient) in coefficients.items()
5966
]

tests/sentry/utils/suspect_resolutions/test_commit_correlation.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ class CommitCorrelationTest(TestCase):
2222
def setup(self, status=GroupStatus.RESOLVED):
2323
project = self.create_project()
2424
issue = self.create_group(project=project, status=status, resolved_at=timezone.now())
25-
release = self.create_release(project=project, version="1")
25+
release = self.create_release(
26+
project=project, version="1", date_added=timezone.now() - timedelta(hours=3)
27+
)
2628
repo = self.create_repo(project=project, name=project.name)
2729
commit = Commit.objects.create(
2830
organization_id=project.organization_id, repository_id=repo.id, key="1"
@@ -40,7 +42,6 @@ def setup(self, status=GroupStatus.RESOLVED):
4042
project_id=project.id,
4143
group_id=issue.id,
4244
release_id=release.id,
43-
last_seen=(timezone.now() - timedelta(hours=3)),
4445
)
4546

4647
return project, issue, release, repo
@@ -53,7 +54,7 @@ def test_get_files_changed_resolved_in_release(self):
5354
files_changed = get_files_changed_in_releases(timezone.now(), issue.id, project.id)
5455

5556
assert files_changed.files_changed == {".random", ".random2"}
56-
assert files_changed.release_ids.first() == release.id
57+
assert files_changed.release_ids.first().id == release.id
5758

5859
def test_get_files_changed_resolved_in_commit(self):
5960
(project, issue, release, repo) = self.setup()
@@ -64,7 +65,7 @@ def test_get_files_changed_resolved_in_commit(self):
6465
release_ids, files_changed = (res.release_ids, res.files_changed)
6566

6667
assert files_changed == {".random", ".random2"}
67-
assert release_ids.first() == release.id
68+
assert release_ids.first().id == release.id
6869

6970
def test_get_files_changed_resolved_in_pull_request(self):
7071
(project, issue, release, repo) = self.setup()
@@ -75,23 +76,23 @@ def test_get_files_changed_resolved_in_pull_request(self):
7576
release_ids, files_changed = (res.release_ids, res.files_changed)
7677

7778
assert files_changed == {".random", ".random2"}
78-
assert release_ids.first() == release.id
79+
assert release_ids.first().id == release.id
7980

8081
def test_get_files_changed_unresolved_issue(self):
8182
(project, issue, release, repo) = self.setup(status=GroupStatus.UNRESOLVED)
8283
res = get_files_changed_in_releases(timezone.now(), issue.id, project.id)
8384
release_ids, files_changed = (res.release_ids, res.files_changed)
8485

8586
assert files_changed == {".random", ".random2"}
86-
assert release_ids.first() == release.id
87+
assert release_ids.first().id == release.id
8788

8889
def test_get_files_changed_manually_resolved(self):
8990
(project, issue, release, repo) = self.setup()
9091
res = get_files_changed_in_releases(timezone.now(), issue.id, project.id)
9192
release_ids, files_changed = (res.release_ids, res.files_changed)
9293

9394
assert files_changed == {".random", ".random2"}
94-
assert release_ids.first() == release.id
95+
assert release_ids.first().id == release.id
9596

9697
def test_no_files_changed(self):
9798
project = self.create_project()
@@ -127,8 +128,8 @@ def test_no_files_changed(self):
127128

128129
assert res1.files_changed == set()
129130
assert res2.files_changed == set()
130-
assert res1.release_ids.first() == release.id
131-
assert res2.release_ids.first() == release2.id
131+
assert res1.release_ids.first().id == release.id
132+
assert res2.release_ids.first().id == release2.id
132133
assert not is_issue_commit_correlated(group1.id, group2.id, project.id).is_correlated
133134

134135
def test_files_changed_unreleased_commits(self):
@@ -156,7 +157,7 @@ def test_files_changed_unreleased_commits(self):
156157
release_ids, files_changed = (res.release_ids, res.files_changed)
157158

158159
assert files_changed == set()
159-
assert release_ids.first() == release.id
160+
assert release_ids.first().id == release.id
160161

161162
def test_get_files_changed_shared_files(self):
162163
(project, issue, release, repo) = self.setup()
@@ -213,14 +214,16 @@ def test_get_files_changed_no_shared_files(self):
213214

214215
assert res1.files_changed == {".random", ".random2"}
215216
assert res2.files_changed == {".gitignore"}
216-
assert res1.release_ids.first() == release.id
217-
assert res2.release_ids.first() == release2.id
217+
assert res1.release_ids.first().id == release.id
218+
assert res2.release_ids.first().id == release2.id
218219
assert not is_issue_commit_correlated(issue.id, issue2.id, project.id).is_correlated
219220

220221
def get_files_changed_outside_of_time_window(self):
221222
project = self.create_project()
222223
group = self.create_group(project=project, resolved_at=timezone.now())
223-
release = self.create_release(project=project, version="1")
224+
release = self.create_release(
225+
project=project, version="1", date_added=timezone.now() - timedelta(hours=8)
226+
)
224227
repo = self.create_repo(project=project, name=project.name)
225228
commit = Commit.objects.create(
226229
organization_id=project.organization_id, repository_id=repo.id, key="1"
@@ -235,7 +238,6 @@ def get_files_changed_outside_of_time_window(self):
235238
project_id=project.id,
236239
group_id=group.id,
237240
release_id=release.id,
238-
last_seen=(timezone.now() - timedelta(hours=8)),
239241
)
240242

241243
res = get_files_changed_in_releases(group.resolved_at, group.id, project.id)

tests/sentry/utils/suspect_resolutions/test_get_suspect_resolutions.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class GetSuspectResolutionsTest(TestCase):
2727
"sentry.utils.suspect_resolutions.get_suspect_resolutions.is_issue_error_rate_correlated",
2828
mock.Mock(
2929
return_value=IssueReleaseMetricCorrResult(
30-
[(CandidateMetricCorrResult(0, True, 0.5))],
30+
[(CandidateMetricCorrResult(0, True, 0.5, 100, 100))],
3131
timezone.now(),
3232
timezone.now() - timedelta(days=1),
3333
timezone.now() - timedelta(hours=2),
@@ -56,7 +56,7 @@ def test_get_suspect_resolutions(self):
5656
"sentry.utils.suspect_resolutions.get_suspect_resolutions.is_issue_error_rate_correlated",
5757
mock.Mock(
5858
return_value=IssueReleaseMetricCorrResult(
59-
[(CandidateMetricCorrResult(0, True, 0.5))],
59+
[(CandidateMetricCorrResult(0, True, 0.5, 100, 100))],
6060
timezone.now(),
6161
timezone.now() - timedelta(days=1),
6262
timezone.now() - timedelta(hours=2),
@@ -80,7 +80,7 @@ def test_get_suspect_resolutions_uncorrelated_commit_data(self):
8080
"sentry.utils.suspect_resolutions.get_suspect_resolutions.is_issue_error_rate_correlated",
8181
mock.Mock(
8282
return_value=IssueReleaseMetricCorrResult(
83-
[(CandidateMetricCorrResult(0, False, 0.2))],
83+
[(CandidateMetricCorrResult(0, False, 0.2, 100, 100))],
8484
timezone.now(),
8585
timezone.now() - timedelta(days=1),
8686
timezone.now() - timedelta(hours=2),
@@ -103,7 +103,7 @@ def test_get_suspect_resolutions_uncorrelated_metric_data(self):
103103
"sentry.utils.suspect_resolutions.get_suspect_resolutions.is_issue_error_rate_correlated",
104104
mock.Mock(
105105
return_value=IssueReleaseMetricCorrResult(
106-
[(CandidateMetricCorrResult(0, False, 0.2))],
106+
[(CandidateMetricCorrResult(0, False, 0.2, 100, 100))],
107107
timezone.now(),
108108
timezone.now() - timedelta(days=1),
109109
timezone.now() - timedelta(hours=2),
@@ -158,7 +158,7 @@ def test_get_suspect_resolutions_no_other_issues_in_project(self):
158158
"sentry.utils.suspect_resolutions.get_suspect_resolutions.is_issue_error_rate_correlated",
159159
mock.Mock(
160160
return_value=IssueReleaseMetricCorrResult(
161-
[(CandidateMetricCorrResult(0, True, 0.5))],
161+
[(CandidateMetricCorrResult(0, True, 0.5, 100, 100))],
162162
datetime(2022, 1, 3),
163163
datetime(2022, 1, 2),
164164
datetime(2022, 1, 1),
@@ -193,6 +193,8 @@ def test_suspect_resolutions_evaluation_analytics_event(self, record):
193193
is_commit_correlated=True,
194194
resolved_issue_release_ids=[1, 2],
195195
candidate_issue_release_ids=[3, 4],
196+
resolved_issue_total_events=100,
197+
candidate_issue_total_events=100,
196198
)
197199
]
198200

@@ -202,17 +204,15 @@ def test_record_suspect_resolutions(self, mock_record_suspect_resolutions):
202204
project = self.create_project(organization=organization)
203205
user = self.create_user()
204206
resolved_issue = self.create_group(project=project)
205-
resolution_type = Activity.objects.create(
206-
project=project, group=resolved_issue, type=ActivityType.SET_RESOLVED_IN_RELEASE.value
207-
)
207+
resolution_type = "in_release"
208208

209209
with self.feature("projects:suspect-resolutions"):
210210
issue_resolved.send(
211211
organization_id=organization.id,
212212
project=project,
213213
group=resolved_issue,
214214
user=user,
215-
resolution_type=resolution_type.type,
215+
resolution_type=resolution_type,
216216
sender=type(self.project),
217217
)
218218

tests/sentry/utils/suspect_resolutions/test_metric_correlation.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def test_correlated_issues(self, mock_get_range):
5656
group1_pearson_values = [events for _, events in group1_events]
5757
group2_pearson_values = [events for _, events in group2_events]
5858

59+
group1_total_events = sum(group1_pearson_values)
60+
group2_total_events = sum(group2_pearson_values)
61+
5962
coefficient = calculate_pearson_correlation_coefficient(
6063
group1_pearson_values, group2_pearson_values
6164
)
@@ -67,6 +70,8 @@ def test_correlated_issues(self, mock_get_range):
6770
candidate_suspect_resolution_id=group2.id,
6871
is_correlated=True,
6972
coefficient=coefficient,
73+
candidate_issue_total_events=group2_total_events,
74+
resolved_issue_total_events=group1_total_events,
7075
)
7176
]
7277
assert result.issue_resolved_time == group1.resolved_at
@@ -85,6 +90,9 @@ def test_uncorrelated_issues(self, mock_get_range):
8590
group1_pearson_values = [events for _, events in group1_events]
8691
group2_pearson_values = [events for _, events in group2_events]
8792

93+
group1_total_events = sum(group1_pearson_values)
94+
group2_total_events = sum(group2_pearson_values)
95+
8896
coefficient = calculate_pearson_correlation_coefficient(
8997
group1_pearson_values, group2_pearson_values
9098
)
@@ -96,6 +104,8 @@ def test_uncorrelated_issues(self, mock_get_range):
96104
candidate_suspect_resolution_id=group2.id,
97105
is_correlated=False,
98106
coefficient=coefficient,
107+
candidate_issue_total_events=group2_total_events,
108+
resolved_issue_total_events=group1_total_events,
99109
)
100110
]
101111
assert result.issue_resolved_time == group1.resolved_at
@@ -113,6 +123,9 @@ def test_perfect_correlation(self, mock_get_range):
113123
group1_pearson_values = [events for _, events in group1_events]
114124
group2_pearson_values = [events for _, events in group1_events]
115125

126+
group1_total_events = sum(group1_pearson_values)
127+
group2_total_events = sum(group2_pearson_values)
128+
116129
coefficient = calculate_pearson_correlation_coefficient(
117130
group1_pearson_values, group2_pearson_values
118131
)
@@ -124,6 +137,8 @@ def test_perfect_correlation(self, mock_get_range):
124137
candidate_suspect_resolution_id=group2.id,
125138
is_correlated=True,
126139
coefficient=coefficient,
140+
candidate_issue_total_events=group2_total_events,
141+
resolved_issue_total_events=group1_total_events,
127142
)
128143
]
129144
assert result.issue_resolved_time == group1.resolved_at
@@ -158,6 +173,11 @@ def test_multiple_groups(self, mock_get_range):
158173
group3_pearson_values = [events for _, events in group3_events]
159174
group4_pearson_values = [events for _, events in group4_events]
160175

176+
group1_total_events = sum(group1_pearson_values)
177+
group2_total_events = sum(group2_pearson_values)
178+
group3_total_events = sum(group3_pearson_values)
179+
group4_total_events = sum(group4_pearson_values)
180+
161181
coefficient_group2 = calculate_pearson_correlation_coefficient(
162182
group1_pearson_values, group2_pearson_values
163183
)
@@ -175,16 +195,22 @@ def test_multiple_groups(self, mock_get_range):
175195
candidate_suspect_resolution_id=group2.id,
176196
is_correlated=True,
177197
coefficient=coefficient_group2,
198+
candidate_issue_total_events=group2_total_events,
199+
resolved_issue_total_events=group1_total_events,
178200
),
179201
CandidateMetricCorrResult(
180202
candidate_suspect_resolution_id=group3.id,
181203
is_correlated=True,
182204
coefficient=coefficient_group3,
205+
candidate_issue_total_events=group3_total_events,
206+
resolved_issue_total_events=group1_total_events,
183207
),
184208
CandidateMetricCorrResult(
185209
candidate_suspect_resolution_id=group4.id,
186210
is_correlated=True,
187211
coefficient=coefficient_group4,
212+
candidate_issue_total_events=group4_total_events,
213+
resolved_issue_total_events=group1_total_events,
188214
),
189215
]
190216
assert result.issue_resolved_time == group1.resolved_at

0 commit comments

Comments
 (0)