Skip to content

Commit 2887fdb

Browse files
authored
Support marking unstable jobs from other repos (#5122)
Fixes pytorch/executorch#1759 Before this, a job can only be seen as unstable when it has the unstable keyword in its name. The trick of renaming the job can be done with PyTorch test jobs by manipulating the test matrix. However, that doesn't work for PyTorch build job or Nova workflow without moving the filter script to a separate job to run before them. That approach doesn't scale. So, this PR implements a different approach to query the list of open unstable issues from Rockset from HUD, then group all jobs with an open unstable issues to the unstable group. The PR is big because the following top level pages need to query the list and pass it around to different React components. The main update is in the function `isUnstableJob` which calls `hasOpenUnstableIssue`. The important call paths are as follows: * HUD main page → `getGroupingData` → `classifyGroup` → `hasOpenUnstableIssue` * HUD commit/PR pages → `CommitStatus` → `FilteredJobList` → `FailedJobInfo` → `JobSummary` → `isUnstableJob` * HUD commit/PR pages → `CommitStatus` → `WorkflowsContainer` -> `WorkflowBox` -> `WorkflowJobSummary` -> `JobSummary` → `isUnstableJob` There are some misc pages like HUD flaky test pages that also needs to be updated, but they don't need to have the list of unstable issues, so I will just pass an empty array there. The second part of this change is on Dr.CI. ### Testing `trunk / test-coreml-delegate / macos-job` from ET is marked as unstable by pytorch/executorch#3264, and it seems to show up correctly https://torchci-git-support-unstable-job-other-repos-fbopensource.vercel.app/hud/pytorch/executorch/main. An UX caveat is that all the invocations of the job will appear in the unstable group because there is no `unstable` keyword to distinguish them.
1 parent 2c27f44 commit 2887fdb

File tree

15 files changed

+223
-54
lines changed

15 files changed

+223
-54
lines changed

torchci/components/CommitStatus.tsx

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import FilteredJobList from "./FilteredJobList";
22
import VersionControlLinks from "./VersionControlLinks";
3-
import { CommitData, JobData } from "lib/types";
3+
import { CommitData, JobData, IssueData } from "lib/types";
44
import WorkflowBox from "./WorkflowBox";
55
import styles from "components/commit.module.css";
66
import _ from "lodash";
@@ -15,7 +15,13 @@ import useScrollTo from "lib/useScrollTo";
1515
import WorkflowDispatcher from "./WorkflowDispatcher";
1616
import { useSession } from "next-auth/react";
1717

18-
function WorkflowsContainer({ jobs }: { jobs: JobData[] }) {
18+
function WorkflowsContainer({
19+
jobs,
20+
unstableIssues,
21+
}: {
22+
jobs: JobData[];
23+
unstableIssues: IssueData[];
24+
}) {
1925
useScrollTo();
2026

2127
if (jobs.length === 0) {
@@ -44,6 +50,7 @@ function WorkflowsContainer({ jobs }: { jobs: JobData[] }) {
4450
key={workflowName}
4551
workflowName={workflowName}
4652
jobs={jobs}
53+
unstableIssues={unstableIssues}
4754
/>
4855
);
4956
})}
@@ -58,12 +65,14 @@ export default function CommitStatus({
5865
commit,
5966
jobs,
6067
isCommitPage,
68+
unstableIssues,
6169
}: {
6270
repoOwner: string;
6371
repoName: string;
6472
commit: CommitData;
6573
jobs: JobData[];
6674
isCommitPage: boolean;
75+
unstableIssues: IssueData[];
6776
}) {
6877
const session = useSession();
6978
const isAuthenticated = session.status === "authenticated";
@@ -90,26 +99,30 @@ export default function CommitStatus({
9099
pred={(job) =>
91100
isFailedJob(job) &&
92101
!isRerunDisabledTestsJob(job) &&
93-
!isUnstableJob(job)
102+
!isUnstableJob(job, unstableIssues)
94103
}
95104
showClassification
105+
unstableIssues={unstableIssues}
96106
/>
97107
<FilteredJobList
98108
filterName="Failed unstable jobs"
99109
jobs={jobs}
100-
pred={(job) => isFailedJob(job) && isUnstableJob(job)}
110+
pred={(job) => isFailedJob(job) && isUnstableJob(job, unstableIssues)}
111+
unstableIssues={unstableIssues}
101112
/>
102113
<FilteredJobList
103114
filterName="Daily rerunning disabled jobs"
104115
jobs={jobs}
105116
pred={(job) => isFailedJob(job) && isRerunDisabledTestsJob(job)}
117+
unstableIssues={unstableIssues}
106118
/>
107119
<FilteredJobList
108120
filterName="Pending jobs"
109121
jobs={jobs}
110122
pred={(job) => job.conclusion === "pending"}
123+
unstableIssues={unstableIssues}
111124
/>
112-
<WorkflowsContainer jobs={jobs} />
125+
<WorkflowsContainer jobs={jobs} unstableIssues={unstableIssues} />
113126
{isAuthenticated && isCommitPage && (
114127
<WorkflowDispatcher
115128
repoOwner={repoOwner}

torchci/components/FilteredJobList.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,26 @@ import JobAnnotationToggle from "./JobAnnotationToggle";
66
import JobLinks from "./JobLinks";
77
import JobSummary from "./JobSummary";
88
import LogViewer from "./LogViewer";
9-
import { JobAnnotation } from "lib/types";
9+
import { JobAnnotation, IssueData } from "lib/types";
1010
import useScrollTo from "lib/useScrollTo";
1111

1212
function FailedJobInfo({
1313
job,
1414
showClassification,
1515
annotation,
16+
unstableIssues,
1617
}: {
1718
job: JobData;
1819
showClassification: boolean;
1920
annotation: JobAnnotation;
21+
unstableIssues: IssueData[];
2022
}) {
2123
const router = useRouter();
2224
useScrollTo();
2325
const { repoOwner, repoName } = router.query;
2426
return (
2527
<li key={job.id} id={job.id}>
26-
<JobSummary job={job} />
28+
<JobSummary job={job} unstableIssues={unstableIssues} />
2729
<div>
2830
<JobLinks job={job} />
2931
</div>
@@ -44,11 +46,13 @@ export default function FilteredJobList({
4446
jobs,
4547
pred,
4648
showClassification = false,
49+
unstableIssues,
4750
}: {
4851
filterName: string;
4952
jobs: JobData[];
5053
pred: (job: JobData) => boolean;
5154
showClassification?: boolean;
55+
unstableIssues: IssueData[];
5256
}) {
5357
const router = useRouter();
5458
const { repoOwner, repoName } = router.query;
@@ -84,6 +88,7 @@ export default function FilteredJobList({
8488
data[job?.id ?? ""]["annotation"]) ??
8589
JobAnnotation.NULL
8690
}
91+
unstableIssues={unstableIssues}
8792
/>
8893
))}
8994
</ul>

torchci/components/GroupJobConclusion.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { getGroupConclusionChar } from "lib/JobClassifierUtil";
2-
import { GroupData, JobData } from "lib/types";
2+
import { GroupData, JobData, IssueData } from "lib/types";
33
import styles from "./JobConclusion.module.css";
44
import hudStyles from "./hud.module.css";
55
import TooltipTarget from "components/TooltipTarget";
@@ -39,11 +39,13 @@ export default function HudGroupedCell({
3939
groupData,
4040
isExpanded,
4141
isClassified,
42+
unstableIssues,
4243
}: {
4344
sha: string;
4445
groupData: GroupData;
4546
isExpanded: boolean;
4647
isClassified: boolean;
48+
unstableIssues: IssueData[];
4749
}) {
4850
const [pinnedId, setPinnedId] = useContext(PinnedTooltipContext);
4951
const style = pinnedId.name == groupData.groupName ? hudStyles.highlight : "";
@@ -55,7 +57,7 @@ export default function HudGroupedCell({
5557
const failedPreviousRunJobs = [];
5658
for (const job of groupData.jobs) {
5759
if (isFailedJob(job)) {
58-
if (isRerunDisabledTestsJob(job) || isUnstableJob(job)) {
60+
if (isRerunDisabledTestsJob(job) || isUnstableJob(job, unstableIssues)) {
5961
warningOnlyJobs.push(job);
6062
} else {
6163
erroredJobs.push(job);

torchci/components/JobLinks.tsx

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import TestInsightsLink from "./TestInsights";
77
import ReproductionCommand from "./ReproductionCommand";
88
import { useSession } from "next-auth/react";
99
import { isFailure } from "../lib/JobClassifierUtil";
10+
import { transformJobName } from "../lib/jobUtils";
1011

1112
export default function JobLinks({
1213
job,
@@ -163,9 +164,11 @@ function DisableTest({ job, label }: { job: JobData; label: string }) {
163164

164165
const issues: IssueData[] = data.issues;
165166
const matchingIssues = issues.filter((issue) => issue.title === issueTitle);
167+
const repo = job.repo ?? "pytorch/pytorch";
166168

167169
return (
168170
<DisableIssue
171+
repo={repo}
169172
matchingIssues={matchingIssues}
170173
issueTitle={issueTitle}
171174
issueBody={issueBody}
@@ -174,21 +177,6 @@ function DisableTest({ job, label }: { job: JobData; label: string }) {
174177
);
175178
}
176179

177-
const jobNameRe = /^(.*) \(([^,]*),.*\)/;
178-
function transformJobName(jobName?: string) {
179-
if (jobName == undefined) {
180-
return null;
181-
}
182-
183-
// We want to have the job name in the following format WORKFLOW / JOB (CONFIG)
184-
const jobNameMatch = jobName.match(jobNameRe);
185-
if (jobNameMatch !== null) {
186-
return `${jobNameMatch[1]} (${jobNameMatch[2]})`;
187-
}
188-
189-
return jobName;
190-
}
191-
192180
function formatUnstableJobBody() {
193181
return encodeURIComponent(
194182
"> Please provide a brief reason on why you need to mark this job as unstable."
@@ -229,9 +217,11 @@ function UnstableJob({ job, label }: { job: JobData; label: string }) {
229217
const matchingIssues = issues.filter((issue) =>
230218
issueTitle.includes(issue.title)
231219
);
220+
const repo = job.repo ?? "pytorch/pytorch";
232221

233222
return (
234223
<DisableIssue
224+
repo={repo}
235225
matchingIssues={matchingIssues}
236226
issueTitle={issueTitle}
237227
issueBody={issueBody}
@@ -241,17 +231,19 @@ function UnstableJob({ job, label }: { job: JobData; label: string }) {
241231
}
242232

243233
function DisableIssue({
234+
repo,
244235
matchingIssues,
245236
issueTitle,
246237
issueBody,
247238
isDisabledTest,
248239
}: {
240+
repo: string;
249241
matchingIssues: IssueData[];
250242
issueTitle: string;
251243
issueBody: string;
252244
isDisabledTest: boolean;
253245
}) {
254-
let issueLink = `https://github.com/pytorch/pytorch/issues/new?title=${issueTitle}&body=${issueBody}`;
246+
let issueLink = `https://github.com/${repo}/issues/new?title=${issueTitle}&body=${issueBody}`;
255247
let linkText = isDisabledTest
256248
? "Disable test"
257249
: issueTitle.includes("UNSTABLE")

torchci/components/JobSummary.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { JobData } from "lib/types";
1+
import { JobData, IssueData } from "lib/types";
22
import JobConclusion from "./JobConclusion";
33
import {
44
isFailedJob,
@@ -26,17 +26,19 @@ function BranchName({
2626
export default function JobSummary({
2727
job,
2828
highlight,
29+
unstableIssues,
2930
}: {
3031
job: JobData;
3132
highlight: boolean;
33+
unstableIssues: IssueData[];
3234
}) {
3335
return (
3436
<>
3537
<JobConclusion
3638
conclusion={job.conclusion}
3739
warningOnly={
3840
isFailedJob(job) &&
39-
(isRerunDisabledTestsJob(job) || isUnstableJob(job))
41+
(isRerunDisabledTestsJob(job) || isUnstableJob(job, unstableIssues))
4042
}
4143
/>
4244
<a href={job.htmlUrl}> {job.jobName} </a>

torchci/components/WorkflowBox.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import styles from "components/commit.module.css";
22
import { fetcher } from "lib/GeneralUtils";
33
import { isFailedJob } from "lib/jobUtils";
4-
import { Artifact, JobData } from "lib/types";
4+
import { Artifact, JobData, IssueData } from "lib/types";
55
import useSWR from "swr";
66
import JobArtifact from "./JobArtifact";
77
import JobSummary from "./JobSummary";
@@ -29,11 +29,13 @@ function WorkflowJobSummary({
2929
artifacts,
3030
artifactsToShow,
3131
setArtifactsToShow,
32+
unstableIssues,
3233
}: {
3334
job: JobData;
3435
artifacts?: Artifact[];
3536
artifactsToShow: Set<string>;
3637
setArtifactsToShow: any;
38+
unstableIssues: IssueData[];
3739
}) {
3840
var queueTimeInfo = null;
3941
if (job.queueTimeS != null) {
@@ -72,7 +74,7 @@ function WorkflowJobSummary({
7274

7375
return (
7476
<>
75-
<JobSummary job={job} />
77+
<JobSummary job={job} unstableIssues={unstableIssues} />
7678
<br />
7779
<small>
7880
&nbsp;&nbsp;&nbsp;&nbsp;
@@ -99,9 +101,11 @@ function WorkflowJobSummary({
99101
export default function WorkflowBox({
100102
workflowName,
101103
jobs,
104+
unstableIssues,
102105
}: {
103106
workflowName: string;
104107
jobs: JobData[];
108+
unstableIssues: IssueData[];
105109
}) {
106110
const isFailed = jobs.some(isFailedJob) !== false;
107111
const workflowClass = isFailed
@@ -171,6 +175,7 @@ export default function WorkflowBox({
171175
artifacts={groupedArtifacts?.get(job.id)}
172176
artifactsToShow={artifactsToShow}
173177
setArtifactsToShow={setArtifactsToShow}
178+
unstableIssues={unstableIssues}
174179
/>
175180
{(searchString && (
176181
<SearchLogViewer

torchci/lib/JobClassifierUtil.ts

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { GroupedJobStatus, JobStatus } from "components/GroupJobConclusion";
2-
import { GroupData, RowData } from "./types";
2+
import { GroupData, RowData, IssueData } from "./types";
3+
import { hasOpenUnstableIssue } from "lib/jobUtils";
34

45
const GROUP_MEMORY_LEAK_CHECK = "Memory Leak Check";
56
const GROUP_RERUN_DISABLED_TESTS = "Rerun Disabled Tests";
@@ -179,7 +180,16 @@ export function sortGroupNamesForHUD(groupNames: string[]): string[] {
179180
return result;
180181
}
181182

182-
export function classifyGroup(jobName: string): string {
183+
export function classifyGroup(
184+
jobName: string,
185+
unstableIssues?: IssueData[]
186+
): string {
187+
// Double check first if the job has been marked as unstable but doesn't include
188+
// the unstable keyword
189+
if (hasOpenUnstableIssue(jobName, unstableIssues)) {
190+
return GROUP_UNSTABLE;
191+
}
192+
183193
for (const group of groups) {
184194
if (jobName.match(group.regex)) {
185195
return group.name;
@@ -276,12 +286,16 @@ export function getConclusionSeverityForSorting(conclusion?: string): number {
276286
}
277287
}
278288

279-
export function getGroupingData(shaGrid: RowData[], jobNames: string[]) {
289+
export function getGroupingData(
290+
shaGrid: RowData[],
291+
jobNames: string[],
292+
unstableIssues?: IssueData[]
293+
) {
280294
// Construct Job Groupping Mapping
281295
const groupNameMapping = new Map<string, Array<string>>(); // group -> [jobs]
282296
const jobToGroupName = new Map<string, string>(); // job -> group
283297
for (const name of jobNames) {
284-
const groupName = classifyGroup(name);
298+
const groupName = classifyGroup(name, unstableIssues);
285299
const jobsInGroup = groupNameMapping.get(groupName) ?? [];
286300
jobsInGroup.push(name);
287301
groupNameMapping.set(groupName, jobsInGroup);
@@ -311,6 +325,9 @@ export function isPersistentGroup(name: string) {
311325
);
312326
}
313327

314-
export function isUnstableGroup(name: string) {
315-
return name.toLocaleLowerCase().includes("unstable");
328+
export function isUnstableGroup(name: string, unstableIssues?: IssueData[]) {
329+
return (
330+
name.toLocaleLowerCase().includes("unstable") ||
331+
hasOpenUnstableIssue(name, unstableIssues)
332+
);
316333
}

torchci/lib/fetchHud.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { HudParams, JobData, RowData } from "./types";
55
import rocksetVersions from "rockset/prodVersions.json";
66
import { isFailure } from "./JobClassifierUtil";
77
import { isRerunDisabledTestsJob, isUnstableJob } from "./jobUtils";
8+
import fetchIssuesByLabel from "lib/fetchIssuesByLabel";
89

910
export default async function fetchHud(params: HudParams): Promise<{
1011
shaGrid: RowData[];
@@ -93,7 +94,10 @@ export default async function fetchHud(params: HudParams): Promise<{
9394
results = results?.filter((job: JobData) => !isRerunDisabledTestsJob(job));
9495
}
9596
if (params.filter_unstable) {
96-
results = results?.filter((job: JobData) => !isUnstableJob(job));
97+
const unstableIssues = await fetchIssuesByLabel("unstable");
98+
results = results?.filter(
99+
(job: JobData) => !isUnstableJob(job, unstableIssues ?? [])
100+
);
97101
}
98102

99103
const namesSet: Set<string> = new Set();

0 commit comments

Comments
 (0)