From 1a4863421bcae3ca1d50490256f4cd9236dc49cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Mon, 7 Apr 2025 12:52:09 +0200
Subject: [PATCH 1/2] Sort job duration changes by absolute duration

It was supposed to be like this from the start, but I forgot to apply the `abs` operation, as I got sidetracked with how to actually compare floats...
---
 src/ci/citool/src/analysis.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ci/citool/src/analysis.rs b/src/ci/citool/src/analysis.rs
index 7fbfad467c641..6ac485b12ba80 100644
--- a/src/ci/citool/src/analysis.rs
+++ b/src/ci/citool/src/analysis.rs
@@ -225,7 +225,7 @@ pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics
             });
         }
     }
-    changes.sort_by(|e1, e2| e1.change.partial_cmp(&e2.change).unwrap().reverse());
+    changes.sort_by(|e1, e2| e1.change.abs().partial_cmp(&e2.change.abs()).unwrap().reverse());
 
     println!("# Job duration changes");
     for (index, entry) in changes.into_iter().take(10).enumerate() {

From 6ece1de0cd85156e211f58979081163405c8c5c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Mon, 7 Apr 2025 14:08:31 +0200
Subject: [PATCH 2/2] Add job summary links to post-merge report

This should make it much easier to investigate the individual job changes.
---
 src/ci/citool/src/analysis.rs |  42 ++++++++++---
 src/ci/citool/src/github.rs   | 109 ++++++++++++++++++++++++++++++++++
 src/ci/citool/src/main.rs     |  11 +++-
 3 files changed, 152 insertions(+), 10 deletions(-)
 create mode 100644 src/ci/citool/src/github.rs

diff --git a/src/ci/citool/src/analysis.rs b/src/ci/citool/src/analysis.rs
index 6ac485b12ba80..208a494183c02 100644
--- a/src/ci/citool/src/analysis.rs
+++ b/src/ci/citool/src/analysis.rs
@@ -7,6 +7,7 @@ use build_helper::metrics::{
     format_build_steps,
 };
 
+use crate::github::JobInfoResolver;
 use crate::metrics;
 use crate::metrics::{JobMetrics, JobName, get_test_suites};
 use crate::utils::{output_details, pluralize};
@@ -185,13 +186,19 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
 }
 
 /// Outputs a report of test differences between the `parent` and `current` commits.
-pub fn output_test_diffs(job_metrics: &HashMap<JobName, JobMetrics>) {
+pub fn output_test_diffs(
+    job_metrics: &HashMap<JobName, JobMetrics>,
+    job_info_resolver: &mut JobInfoResolver,
+) {
     let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
-    report_test_diffs(aggregated_test_diffs);
+    report_test_diffs(aggregated_test_diffs, job_metrics, job_info_resolver);
 }
 
 /// Prints the ten largest differences in bootstrap durations.
-pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics>) {
+pub fn output_largest_duration_changes(
+    job_metrics: &HashMap<JobName, JobMetrics>,
+    job_info_resolver: &mut JobInfoResolver,
+) {
     struct Entry<'a> {
         job: &'a JobName,
         before: Duration,
@@ -230,9 +237,9 @@ pub fn output_largest_duration_changes(job_metrics: &HashMap<JobName, JobMetrics
     println!("# Job duration changes");
     for (index, entry) in changes.into_iter().take(10).enumerate() {
         println!(
-            "{}. `{}`: {:.1}s -> {:.1}s ({:.1}%)",
+            "{}. {}: {:.1}s -> {:.1}s ({:.1}%)",
             index + 1,
-            entry.job,
+            format_job_link(job_info_resolver, job_metrics, entry.job),
             entry.before.as_secs_f64(),
             entry.after.as_secs_f64(),
             entry.change
@@ -400,7 +407,11 @@ fn generate_test_name(name: &str) -> String {
 }
 
 /// Prints test changes in Markdown format to stdout.
-fn report_test_diffs(diff: AggregatedTestDiffs) {
+fn report_test_diffs(
+    diff: AggregatedTestDiffs,
+    job_metrics: &HashMap<JobName, JobMetrics>,
+    job_info_resolver: &mut JobInfoResolver,
+) {
     println!("# Test differences");
     if diff.diffs.is_empty() {
         println!("No test diffs found");
@@ -521,9 +532,26 @@ fn report_test_diffs(diff: AggregatedTestDiffs) {
                 println!(
                     "- {}: {}",
                     format_job_group(group as u64),
-                    jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
+                    jobs.iter()
+                        .map(|j| format_job_link(job_info_resolver, job_metrics, j))
+                        .collect::<Vec<_>>()
+                        .join(", ")
                 );
             }
         },
     );
 }
+
+/// Tries to get a GitHub Actions job summary URL from the resolver.
+/// If it is not available, just wraps the job name in backticks.
+fn format_job_link(
+    job_info_resolver: &mut JobInfoResolver,
+    job_metrics: &HashMap<JobName, JobMetrics>,
+    job_name: &str,
+) -> String {
+    job_metrics
+        .get(job_name)
+        .and_then(|metrics| job_info_resolver.get_job_summary_link(job_name, &metrics.current))
+        .map(|summary_url| format!("[{job_name}]({summary_url})"))
+        .unwrap_or_else(|| format!("`{job_name}`"))
+}
diff --git a/src/ci/citool/src/github.rs b/src/ci/citool/src/github.rs
new file mode 100644
index 0000000000000..35e4c3f9599d6
--- /dev/null
+++ b/src/ci/citool/src/github.rs
@@ -0,0 +1,109 @@
+use std::collections::HashMap;
+
+use anyhow::Context;
+use build_helper::metrics::{CiMetadata, JsonRoot};
+
+pub struct GitHubClient;
+
+impl GitHubClient {
+    fn get_workflow_run_jobs(
+        &self,
+        repo: &str,
+        workflow_run_id: u64,
+    ) -> anyhow::Result<Vec<GitHubJob>> {
+        let req = ureq::get(format!(
+            "https://api.github.com/repos/{repo}/actions/runs/{workflow_run_id}/jobs?per_page=100"
+        ))
+        .header("User-Agent", "rust-lang/rust/citool")
+        .header("Accept", "application/vnd.github+json")
+        .header("X-GitHub-Api-Version", "2022-11-28")
+        .call()
+        .context("cannot get workflow job list")?;
+
+        let status = req.status();
+        let mut body = req.into_body();
+        if status.is_success() {
+            // This API response is actually paged, but we assume for now that there are at
+            // most 100 jobs per workflow.
+            let response = body
+                .read_json::<WorkflowRunJobsResponse>()
+                .context("cannot deserialize workflow run jobs response")?;
+            // The CI job names have a prefix, e.g. `auto - foo`. We remove the prefix here to
+            // normalize the job name.
+            Ok(response
+                .jobs
+                .into_iter()
+                .map(|mut job| {
+                    job.name = job
+                        .name
+                        .split_once(" - ")
+                        .map(|res| res.1.to_string())
+                        .unwrap_or_else(|| job.name);
+                    job
+                })
+                .collect())
+        } else {
+            Err(anyhow::anyhow!(
+                "Cannot get jobs of workflow run {workflow_run_id}: {status}\n{}",
+                body.read_to_string()?
+            ))
+        }
+    }
+}
+
+#[derive(serde::Deserialize)]
+struct WorkflowRunJobsResponse {
+    jobs: Vec<GitHubJob>,
+}
+
+#[derive(serde::Deserialize)]
+struct GitHubJob {
+    name: String,
+    id: u64,
+}
+
+/// Can be used to resolve information about GitHub Actions jobs.
+/// Caches results internally to avoid too unnecessary GitHub API calls.
+pub struct JobInfoResolver {
+    client: GitHubClient,
+    // Workflow run ID -> jobs
+    workflow_job_cache: HashMap<u64, Vec<GitHubJob>>,
+}
+
+impl JobInfoResolver {
+    pub fn new() -> Self {
+        Self { client: GitHubClient, workflow_job_cache: Default::default() }
+    }
+
+    /// Get a link to a job summary for the given job name and bootstrap execution.
+    pub fn get_job_summary_link(&mut self, job_name: &str, metrics: &JsonRoot) -> Option<String> {
+        metrics.ci_metadata.as_ref().and_then(|metadata| {
+            self.get_job_id(metadata, job_name).map(|job_id| {
+                format!(
+                    "https://github.com/{}/actions/runs/{}#summary-{job_id}",
+                    metadata.repository, metadata.workflow_run_id
+                )
+            })
+        })
+    }
+
+    fn get_job_id(&mut self, ci_metadata: &CiMetadata, job_name: &str) -> Option<u64> {
+        if let Some(job) = self
+            .workflow_job_cache
+            .get(&ci_metadata.workflow_run_id)
+            .and_then(|jobs| jobs.iter().find(|j| j.name == job_name))
+        {
+            return Some(job.id);
+        }
+
+        let jobs = self
+            .client
+            .get_workflow_run_jobs(&ci_metadata.repository, ci_metadata.workflow_run_id)
+            .inspect_err(|e| eprintln!("Cannot download workflow jobs: {e:?}"))
+            .ok()?;
+        let job_id = jobs.iter().find(|j| j.name == job_name).map(|j| j.id);
+        // Save the cache even if the job name was not found, it could be useful for further lookups
+        self.workflow_job_cache.insert(ci_metadata.workflow_run_id, jobs);
+        job_id
+    }
+}
diff --git a/src/ci/citool/src/main.rs b/src/ci/citool/src/main.rs
index 6db5eab458cca..a1956da352f5c 100644
--- a/src/ci/citool/src/main.rs
+++ b/src/ci/citool/src/main.rs
@@ -1,6 +1,7 @@
 mod analysis;
 mod cpu_usage;
 mod datadog;
+mod github;
 mod jobs;
 mod metrics;
 mod utils;
@@ -18,6 +19,7 @@ use serde_yaml::Value;
 use crate::analysis::{output_largest_duration_changes, output_test_diffs};
 use crate::cpu_usage::load_cpu_usage;
 use crate::datadog::upload_datadog_metric;
+use crate::github::JobInfoResolver;
 use crate::jobs::RunType;
 use crate::metrics::{JobMetrics, download_auto_job_metrics, download_job_metrics, load_metrics};
 use crate::utils::load_env_var;
@@ -145,6 +147,7 @@ fn postprocess_metrics(
 ) -> anyhow::Result<()> {
     let metrics = load_metrics(&metrics_path)?;
 
+    let mut job_info_resolver = JobInfoResolver::new();
     if let (Some(parent), Some(job_name)) = (parent, job_name) {
         // This command is executed also on PR builds, which might not have parent metrics
         // available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
@@ -160,7 +163,7 @@ fn postprocess_metrics(
                     job_name,
                     JobMetrics { parent: Some(parent_metrics), current: metrics },
                 )]);
-                output_test_diffs(&job_metrics);
+                output_test_diffs(&job_metrics, &mut job_info_resolver);
                 return Ok(());
             }
             Err(error) => {
@@ -180,8 +183,10 @@ fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow
     let metrics = download_auto_job_metrics(&db, &parent, &current)?;
 
     println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
-    output_test_diffs(&metrics);
-    output_largest_duration_changes(&metrics);
+
+    let mut job_info_resolver = JobInfoResolver::new();
+    output_test_diffs(&metrics, &mut job_info_resolver);
+    output_largest_duration_changes(&metrics, &mut job_info_resolver);
 
     Ok(())
 }