From e6ca99296021989c7e729b4749e1d830244c1ea1 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 27 Feb 2025 14:01:17 -0800
Subject: [PATCH 001/114] Move UR devops scripts to devops folder

---
 .github/workflows/ur-benchmarks-reusable.yml        |   6 +++---
 .github/workflows/ur-build-hw.yml                   |   2 +-
 .../scripts/benchmarks/README.md                    |   0
 .../scripts/benchmarks/benches/base.py              |   0
 .../scripts/benchmarks/benches/compute.py           |   0
 .../scripts/benchmarks/benches/llamacpp.py          |   0
 .../scripts/benchmarks/benches/oneapi.py            |   0
 .../scripts/benchmarks/benches/result.py            |   0
 .../scripts/benchmarks/benches/syclbench.py         |   0
 .../scripts/benchmarks/benches/test.py              |   0
 .../scripts/benchmarks/benches/umf.py               |   0
 .../scripts/benchmarks/benches/velocity.py          |   0
 .../benchmarks/benchmark_results.html.template      |   0
 .../scripts/benchmarks/history.py                   |   0
 .../scripts/benchmarks/main.py                      |   0
 .../scripts/benchmarks/options.py                   |   0
 .../scripts/benchmarks/output_html.py               |   0
 .../scripts/benchmarks/output_markdown.py           |   0
 .../scripts/benchmarks/requirements.txt             |   0
 .../scripts/benchmarks/utils/compute_runtime.py     |   0
 .../scripts/benchmarks/utils/utils.py               |   0
 .../scripts/benchmarks/workflow.png                 | Bin
 .../.github => devops}/scripts/get_system_info.sh   |   0
 23 files changed, 4 insertions(+), 4 deletions(-)
 rename {unified-runtime => devops}/scripts/benchmarks/README.md (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/base.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/compute.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/llamacpp.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/oneapi.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/result.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/syclbench.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/test.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/umf.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benches/velocity.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/benchmark_results.html.template (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/history.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/main.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/options.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/output_html.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/output_markdown.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/requirements.txt (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/utils/compute_runtime.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/utils/utils.py (100%)
 rename {unified-runtime => devops}/scripts/benchmarks/workflow.png (100%)
 rename {unified-runtime/.github => devops}/scripts/get_system_info.sh (100%)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 3b5a0480421d4..6e8a4ea535d15 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -82,7 +82,7 @@ jobs:
 
     - name: Install pip packages
       run: |
-        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks/requirements.txt
+        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -139,7 +139,7 @@ jobs:
       working-directory: ${{ github.workspace }}
       id: benchmarks
       run: >
-        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py
+        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
         --ur ${{ github.workspace }}/ur_install
@@ -195,4 +195,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh
+      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index 8ad0f45bb35bb..9cf4d262d580d 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -145,4 +145,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh
+      run: ${{github.workspace}}/devops/scripts/get_system_info.sh
diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
similarity index 100%
rename from unified-runtime/scripts/benchmarks/README.md
rename to devops/scripts/benchmarks/README.md
diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/base.py
rename to devops/scripts/benchmarks/benches/base.py
diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/compute.py
rename to devops/scripts/benchmarks/benches/compute.py
diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py
rename to devops/scripts/benchmarks/benches/llamacpp.py
diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/benches/oneapi.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/benches/oneapi.py
diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/benches/result.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/benches/result.py
diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/syclbench.py
rename to devops/scripts/benchmarks/benches/syclbench.py
diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/test.py
rename to devops/scripts/benchmarks/benches/test.py
diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/umf.py
rename to devops/scripts/benchmarks/benches/umf.py
diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benches/velocity.py
rename to devops/scripts/benchmarks/benches/velocity.py
diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
similarity index 100%
rename from unified-runtime/scripts/benchmarks/benchmark_results.html.template
rename to devops/scripts/benchmarks/benchmark_results.html.template
diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/history.py
rename to devops/scripts/benchmarks/history.py
diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/main.py
rename to devops/scripts/benchmarks/main.py
diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/options.py
rename to devops/scripts/benchmarks/options.py
diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/output_html.py
rename to devops/scripts/benchmarks/output_html.py
diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/output_markdown.py
rename to devops/scripts/benchmarks/output_markdown.py
diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
similarity index 100%
rename from unified-runtime/scripts/benchmarks/requirements.txt
rename to devops/scripts/benchmarks/requirements.txt
diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py
rename to devops/scripts/benchmarks/utils/compute_runtime.py
diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
similarity index 100%
rename from unified-runtime/scripts/benchmarks/utils/utils.py
rename to devops/scripts/benchmarks/utils/utils.py
diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png
similarity index 100%
rename from unified-runtime/scripts/benchmarks/workflow.png
rename to devops/scripts/benchmarks/workflow.png
diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh
similarity index 100%
rename from unified-runtime/.github/scripts/get_system_info.sh
rename to devops/scripts/get_system_info.sh

From 3d42db259ac9e04b59a9fe4f660024ac9073736d Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Feb 2025 08:38:44 -0800
Subject: [PATCH 002/114] Restrict number of cores used

---
 devops/actions/run-tests/benchmark/action.yml | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 7f69fdf832982..69631d044891c 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -46,6 +46,26 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+  - name: Compute CPU core range to run benchmarks on
+    run: |
+      # Taken from ur-benchmark-reusable.yml:
+
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
+
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
   - name: Run compute-benchmarks
     shell: bash
     run: |
@@ -69,7 +89,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
   - name: Push compute-benchmarks results
     if: always()
     shell: bash

From 4f08dd6fbf51002f45b0c9a44fa0310e94de5001 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 4 Mar 2025 13:20:29 -0800
Subject: [PATCH 003/114] Restore ur-benchmark*.yml

---
 .github/workflows/ur-benchmarks-reusable.yml | 198 ++++++++++++++++++-
 .github/workflows/ur-benchmarks.yml          |  55 +++++-
 2 files changed, 240 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 66ffcecd70314..6e8a4ea535d15 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -1,12 +1,198 @@
 name: Benchmarks Reusable
 
-# This workflow is a WIP: This workflow file acts as a placeholder.
+on:
+  workflow_call:
+    inputs:
+      str_name:
+        required: true
+        type: string
+      pr_no:
+        required: true
+        # even though this is a number, this is a workaround for issues with
+        # reusable workflow calls that result in "Unexpected value '0'" error.
+        type: string
+      bench_script_params:
+        required: false
+        type: string
+        default: ''
+      sycl_config_params:
+        required: false
+        type: string
+        default: ''
+      upload_report:
+        required: false
+        type: boolean
+        default: false
+      compute_runtime_commit:
+        required: false
+        type: string
+        default: ''
 
-on: [ workflow_call ]
+permissions:
+  contents: read
+  pull-requests: write
 
 jobs:
-  do-nothing:
-    runs-on: ubuntu-latest
+  bench-run:
+    name: Build SYCL, Run Benchmarks
+    strategy:
+      matrix:
+        adapter: [
+          {str_name: "${{ inputs.str_name }}",
+          sycl_config: "${{ inputs.sycl_config_params }}"
+          }
+        ]
+        build_type: [Release]
+        compiler: [{c: clang, cxx: clang++}]
+
+    runs-on: "PVC_PERF"
+
     steps:
-      - run: echo 'This workflow is a WIP.'
-  
+    - name: Add comment to PR
+      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+      if: ${{ always() && inputs.pr_no != 0 }}
+      with:
+        script: |
+          const pr_no = '${{ inputs.pr_no }}';
+          const adapter = '${{ matrix.adapter.str_name }}';
+          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
+          const params = '${{ inputs.bench_script_params }}';
+          const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}`;
+
+          github.rest.issues.createComment({
+            issue_number: pr_no,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: body
+          })
+
+    - name: Checkout SYCL
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        path: sycl-repo
+
+    # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged.
+    - name: Fetch PR's merge commit
+      if: ${{ inputs.pr_no != 0 }}
+      working-directory: ${{github.workspace}}/sycl-repo
+      run: |
+        git fetch -- https://github.com/${{github.repository}} +refs/pull/${{ inputs.pr_no }}/*:refs/remotes/origin/pr/${{ inputs.pr_no }}/*
+        git checkout origin/pr/${{ inputs.pr_no }}/merge
+        git rev-parse origin/pr/${{ inputs.pr_no }}/merge
+
+    - name: Install pip packages
+      run: |
+        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+
+    - name: Configure SYCL
+      run: >
+        python3 sycl-repo/buildbot/configure.py
+        -t ${{matrix.build_type}}
+        -o ${{github.workspace}}/sycl_build
+        --cmake-gen "Ninja"
+        --cmake-opt="-DLLVM_INSTALL_UTILS=ON"
+        --cmake-opt="-DSYCL_PI_TESTS=OFF"
+        --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache
+        --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+        ${{matrix.adapter.sycl_config}}
+
+    - name: Build SYCL
+      run: cmake --build ${{github.workspace}}/sycl_build -j $(nproc)
+
+    # We need a complete installed UR for compute-benchmarks.
+    - name: Configure UR
+      run: >
+        cmake -DCMAKE_BUILD_TYPE=${{matrix.build_type}}
+        -S${{github.workspace}}/sycl-repo/unified-runtime
+        -B${{github.workspace}}/ur_build
+        -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/ur_install
+        -DUR_BUILD_TESTS=OFF
+        -DUR_BUILD_ADAPTER_L0=ON
+        -DUR_BUILD_ADAPTER_L0_V2=ON
+        -DUMF_DISABLE_HWLOC=ON
+
+    - name: Build UR
+      run: cmake --build ${{github.workspace}}/ur_build -j $(nproc)
+
+    - name: Install UR
+      run: cmake --install ${{github.workspace}}/ur_build
+
+    - name: Compute core range
+      run: |
+        # Compute the core range for the first NUMA node; second node is for UMF jobs.
+        # Skip the first 4 cores - the kernel is likely to schedule more work on these.
+        CORES="$(lscpu | awk '
+          /NUMA node0 CPU|On-line CPU/ {line=$0}
+          END {
+            split(line, a, " ")
+            split(a[4], b, ",")
+            sub(/^0/, "4", b[1])
+            print b[1]
+          }')"
+        echo "Selected core: $CORES"
+        echo "CORES=$CORES" >> $GITHUB_ENV
+
+        ZE_AFFINITY_MASK=0
+        echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+
+    - name: Run benchmarks
+      working-directory: ${{ github.workspace }}
+      id: benchmarks
+      run: >
+        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
+        ~/llvm_bench_workdir
+        --sycl ${{ github.workspace }}/sycl_build
+        --ur ${{ github.workspace }}/ur_install
+        --adapter ${{ matrix.adapter.str_name }}
+        --compare baseline
+        --compute-runtime ${{ inputs.compute_runtime_commit }}
+        --build-igc
+        ${{ inputs.upload_report && '--output-html' || '' }}
+        ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
+        ${{ inputs.bench_script_params }}
+
+    - name: Print benchmark results
+      run: |
+        cat ${{ github.workspace }}/benchmark_results.md || true
+
+    - name: Add comment to PR
+      uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+      if: ${{ always() && inputs.pr_no != 0 }}
+      with:
+        script: |
+          let markdown = ""
+          try {
+            const fs = require('fs');
+            markdown = fs.readFileSync('benchmark_results.md', 'utf8');
+          } catch(err) {
+          }
+
+          const pr_no = '${{ inputs.pr_no }}';
+          const adapter = '${{ matrix.adapter.str_name }}';
+          const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
+          const test_status = '${{ steps.benchmarks.outcome }}';
+          const job_status = '${{ job.status }}';
+          const params = '${{ inputs.bench_script_params }}';
+          const body = `Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`;
+
+          github.rest.issues.createComment({
+            issue_number: pr_no,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: body
+          })
+
+    - name: Rename benchmark results file
+      if: ${{ always() && inputs.upload_report }}
+      run: mv benchmark_results.html benchmark_results_${{ inputs.pr_no }}.html
+
+    - name: Upload HTML report
+      if: ${{ always() && inputs.upload_report }}
+      uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+      with:
+        path: benchmark_results_${{ inputs.pr_no }}.html
+        key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }}
+
+    - name: Get information about platform
+      if: ${{ always() }}
+      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
diff --git a/.github/workflows/ur-benchmarks.yml b/.github/workflows/ur-benchmarks.yml
index 23fbb1ad903b4..cde4bfa828d71 100644
--- a/.github/workflows/ur-benchmarks.yml
+++ b/.github/workflows/ur-benchmarks.yml
@@ -1,12 +1,53 @@
 name: Benchmarks
 
-# This workflow is a WIP: this workflow file acts as a placeholder.
+on:
+  workflow_dispatch:
+    inputs:
+      str_name:
+        description: Adapter
+        type: choice
+        required: true
+        default: 'level_zero'
+        options:
+          - level_zero
+          - level_zero_v2
+      pr_no:
+        description: PR number (0 is sycl main branch)
+        type: number
+        required: true
+      bench_script_params:
+        description: Benchmark script arguments
+        type: string
+        required: false
+        default: ''
+      sycl_config_params:
+        description: Extra params for SYCL configuration
+        type: string
+        required: false
+        default: ''
+      compute_runtime_commit:
+        description: 'Compute Runtime commit'
+        type: string
+        required: false
+        default: ''
+      upload_report:
+        description: 'Upload HTML report'
+        type: boolean
+        required: false
+        default: false
 
-on: [ workflow_dispatch ]
+permissions:
+  contents: read
+  pull-requests: write
 
 jobs:
-  do-nothing:
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo 'This workflow is a WIP.'
-
+  manual:
+    name: Compute Benchmarks
+    uses: ./.github/workflows/ur-benchmarks-reusable.yml
+    with:
+      str_name: ${{ inputs.str_name }}
+      pr_no: ${{ inputs.pr_no }}
+      bench_script_params: ${{ inputs.bench_script_params }}
+      sycl_config_params: ${{ inputs.sycl_config_params }}
+      compute_runtime_commit: ${{ inputs.compute_runtime_commit }}
+      upload_report: ${{ inputs.upload_report }}

From 497dcce9d87e8d610b21afd930669e8059eba54f Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Wed, 5 Mar 2025 16:32:30 +0000
Subject: [PATCH 004/114] [benchmarks] improve HTML and Markdown output

This patch improves numerous aspects on how the benchmarking
results are visualized:
 - rewrites the way HTML charts are generated, using a library (Chart.js)
 that's both easier to use and more visually pleasing.
 The new HTML page also now decouples data from the HTML itself,
 leading to faster load times and the ability to fetch data
 from remote sources.
 - The markdown output now contains a failures section that
 lists all benchmarks that failed for a given run. This will be
 a helpful for developers during PR testing.
 - Benchmarks can now have description that's displayed on the page.
 - And many more minor improvements.
---
 devops/scripts/benchmarks/benches/base.py     |  17 +-
 devops/scripts/benchmarks/benches/compute.py  | 130 ++--
 devops/scripts/benchmarks/benches/llamacpp.py |  18 +-
 .../scripts/benchmarks/benches/syclbench.py   |  36 +-
 devops/scripts/benchmarks/benches/test.py     |  17 +-
 devops/scripts/benchmarks/benches/umf.py      |  33 +-
 devops/scripts/benchmarks/benches/velocity.py |  79 ++-
 .../benchmark_results.html.template           | 192 ------
 devops/scripts/benchmarks/history.py          |  19 +-
 devops/scripts/benchmarks/html/config.js      |   5 +
 devops/scripts/benchmarks/html/index.html     | 205 +++++++
 devops/scripts/benchmarks/html/scripts.js     | 556 ++++++++++++++++++
 devops/scripts/benchmarks/main.py             |  52 +-
 devops/scripts/benchmarks/options.py          |   1 +
 devops/scripts/benchmarks/output_html.py      | 352 +----------
 devops/scripts/benchmarks/output_markdown.py  |  40 +-
 .../benchmarks/{benches => utils}/oneapi.py   |  20 +-
 .../benchmarks/{benches => utils}/result.py   |  17 +-
 devops/scripts/benchmarks/utils/utils.py      |  26 +-
 19 files changed, 1167 insertions(+), 648 deletions(-)
 delete mode 100644 devops/scripts/benchmarks/benchmark_results.html.template
 create mode 100644 devops/scripts/benchmarks/html/config.js
 create mode 100644 devops/scripts/benchmarks/html/index.html
 create mode 100644 devops/scripts/benchmarks/html/scripts.js
 rename devops/scripts/benchmarks/{benches => utils}/oneapi.py (79%)
 rename devops/scripts/benchmarks/{benches => utils}/result.py (69%)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index d1bb5fb53b83a..77365220dbf85 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
 import os
 import shutil
 from pathlib import Path
-from .result import Result
+from utils.result import Result
 from options import options
 from utils.utils import download, run
 import urllib.request
@@ -55,16 +55,25 @@ def create_data_path(self, name, skip_data_dir=False):
             data_path = os.path.join(self.directory, name)
         else:
             data_path = os.path.join(self.directory, "data", name)
-            if options.rebuild and Path(data_path).exists():
+            if options.redownload and Path(data_path).exists():
                 shutil.rmtree(data_path)
 
         Path(data_path).mkdir(parents=True, exist_ok=True)
 
         return data_path
 
-    def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False):
+    def download(
+        self,
+        name,
+        url,
+        file,
+        untar=False,
+        unzip=False,
+        skip_data_dir=False,
+        checksum="",
+    ):
         self.data_path = self.create_data_path(name, skip_data_dir)
-        return download(self.data_path, url, file, untar, unzip)
+        return download(self.data_path, url, file, untar, unzip, checksum)
 
     def name(self):
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 5e420d560a463..18ed969728902 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,10 +8,11 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 from enum import Enum
 
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -47,9 +48,8 @@ def setup(self):
                 f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
             ]
 
-        print(f"{self.__class__.__name__}: Run {configure_command}")
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j")
+
         run(f"cmake --build {build_path} -j", add_sycl=True)
 
         self.built = True
@@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
-            MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
-            MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
-            MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
@@ -98,6 +88,16 @@ def benchmarks(self) -> list[Benchmark]:
                 SubmitKernelUR(self, 0, 0),
                 SubmitKernelUR(self, 1, 0),
                 SubmitKernelUR(self, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
+                MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
+                MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
+                MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
+                MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
+                MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
+                MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
+                MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
@@ -136,6 +136,9 @@ def setup(self):
     def explicit_group(self):
         return ""
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
@@ -167,6 +170,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
+                    description=self.description()
                 )
             )
         return ret
@@ -221,6 +225,13 @@ def bin_args(self) -> list[str]:
             "--KernelExecTime=1",
         ]
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through SYCL API."
+            "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
+        )
+
 
 class SubmitKernelUR(ComputeBenchmark):
     def __init__(self, bench, ioq, measureCompletion):
@@ -237,6 +248,15 @@ def name(self):
     def explicit_group(self):
         return "SubmitKernel"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        completion = "including" if self.measureCompletion else "excluding"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, "
+            f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time "
+            f"to isolate API overhead."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
@@ -261,6 +281,14 @@ def name(self):
     def explicit_group(self):
         return "SubmitKernel"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        return (
+            f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. "
+            f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
+            f"from execution time."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
@@ -286,6 +314,14 @@ def name(self):
         order = "in order" if self.ioq else "out of order"
         return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        order = "in-order" if self.ioq else "out-of-order"
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL {order} queue overhead for {operation} from {self.source} to "
+            f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -309,6 +345,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        operation = "copy-only" if self.isCopyOnly else "copy and command submission"
+        return (
+            f"Measures SYCL in-order queue memory copy performance for {operation} from "
+            f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -330,6 +373,12 @@ def __init__(self, bench, source, destination, size):
     def name(self):
         return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures general SYCL queue memory copy performance from {self.source} to "
+            f"{self.destination} with {self.size} bytes per operation."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -349,6 +398,12 @@ def __init__(self, bench, type, size, placement):
     def name(self):
         return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}"
 
+    def description(self) -> str:
+        return (
+            f"Measures {self.placement} memory bandwidth using {self.type} pattern with "
+            f"{self.size} bytes. Higher values (GB/s) indicate better performance."
+        )
+
     # measurement is in GB/s
     def lower_is_better(self):
         return False
@@ -362,6 +417,7 @@ def bin_args(self) -> list[str]:
             "--useEvents=0",
             "--contents=Zeros",
             "--multiplier=1",
+            "--vectorSize=1",
         ]
 
 
@@ -372,6 +428,12 @@ def __init__(self, bench):
     def name(self):
         return f"miscellaneous_benchmark_sycl VectorSum"
 
+    def description(self) -> str:
+        return (
+            "Measures performance of vector addition across 3D grid (512x256x256 elements) "
+            "using SYCL."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -408,6 +470,16 @@ def name(self):
             + (" without events" if not self.useEvents else "")
         )
 
+    def description(self) -> str:
+        src_type = "device" if self.srcUSM == 1 else "host"
+        dst_type = "device" if self.dstUSM == 1 else "host"
+        events = "with" if self.useEvents else "without"
+        return (
+            f"Measures multithreaded memory copy performance with {self.numThreads} threads "
+            f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes "
+            f"from {src_type} to {dst_type} memory {events} events."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -441,6 +513,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
     def explicit_group(self):
         return f"SinKernelGraph {self.numKernels}"
 
+    def description(self) -> str:
+        execution = "using graphs" if self.withGraphs else "without graphs"
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"sin kernels {execution}. Tests overhead and benefits of graph-based execution."
+        )
+
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
@@ -452,28 +531,3 @@ def bin_args(self) -> list[str]:
             "--withCopyOffload=1",
             "--immediateAppendCmdList=0",
         ]
-
-
-class GraphApiSubmitExecGraph(ComputeBenchmark):
-    def __init__(self, bench, ioq, submit, numKernels):
-        self.ioq = ioq
-        self.submit = submit
-        self.numKernels = numKernels
-        super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph")
-
-    def name(self):
-        return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}"
-
-    def explicit_group(self):
-        if self.submit:
-            return "SubmitGraph"
-        else:
-            return "ExecGraph"
-
-    def bin_args(self) -> list[str]:
-        return [
-            "--iterations=100",
-            f"--measureSubmit={self.submit}",
-            f"--ioq={self.ioq}",
-            f"--numKernels={self.numKernels}",
-        ]
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 6524c95a9f56f..d8e0ab5d007bb 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -8,10 +8,10 @@
 from pathlib import Path
 from utils.utils import download, git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 
 
@@ -43,6 +43,7 @@ def setup(self):
             self.models_dir,
             "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
             "Phi-3-mini-4k-instruct-q4.gguf",
+            checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
         )
 
         self.oneapi = get_oneapi()
@@ -62,9 +63,9 @@ def setup(self):
             f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
             f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
         ]
-        print(f"{self.__class__.__name__}: Run {configure_command}")
+
         run(configure_command, add_sycl=True)
-        print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j")
+
         run(
             f"cmake --build {self.build_path} -j",
             add_sycl=True,
@@ -92,6 +93,14 @@ def setup(self):
     def name(self):
         return f"llama.cpp"
 
+    def description(self) -> str:
+        return (
+            "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
+            "Runs both prompt processing (initial context processing) and text generation benchmarks with "
+            "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
+            "quantized model and leverages SYCL with oneDNN for acceleration."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -130,6 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
+                    description=self.description()
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index f7cf571a7ecd7..47326b2555a68 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from options import options
 
 
@@ -65,14 +65,14 @@ def benchmarks(self) -> list[Benchmark]:
             DagTaskS(self),
             HostDevBandwidth(self),
             LocalMem(self),
-            Pattern_L2(self),
-            Reduction(self),
+            # Pattern_L2(self), # validation failure
+            # Reduction(self), # validation failure
             ScalarProd(self),
             SegmentReduction(self),
-            UsmAccLatency(self),
+            # UsmAccLatency(self), # validation failure
             UsmAllocLatency(self),
-            UsmInstrMix(self),
-            UsmPinnedOverhead(self),
+            # UsmInstrMix(self), # validation failure
+            # UsmPinnedOverhead(self), # validation failure
             VecAdd(self),
             # *** sycl-bench single benchmarks
             # TwoDConvolution(self), # run time < 1ms
@@ -82,20 +82,20 @@ def benchmarks(self) -> list[Benchmark]:
             Atax(self),
             # Atomic_reduction(self), # run time < 1ms
             Bicg(self),
-            Correlation(self),
-            Covariance(self),
-            Gemm(self),
-            Gesumv(self),
-            Gramschmidt(self),
+            # Correlation(self), # validation failure
+            # Covariance(self), # validation failure
+            # Gemm(self), # validation failure
+            # Gesumv(self), # validation failure
+            # Gramschmidt(self), # validation failure
             KMeans(self),
             LinRegCoeff(self),
             # LinRegError(self), # run time < 1ms
-            MatmulChain(self),
+            # MatmulChain(self), # validation failure
             MolDyn(self),
-            Mvt(self),
+            # Mvt(self), # validation failure
             Sf(self),
-            Syr2k(self),
-            Syrk(self),
+            # Syr2k(self), # validation failure
+            # Syrk(self), # validation failure
         ]
 
 
@@ -122,7 +122,7 @@ def run(self, env_vars) -> list[Result]:
         if self.done:
             return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
-        print(f"{self.__class__.__name__}: Results in {self.outputfile}")
+
         command = [
             f"{self.benchmark_bin}",
             f"--warmup-run",
@@ -143,7 +143,7 @@ def run(self, env_vars) -> list[Result]:
                 if not row[0].startswith("#"):
                     res_list.append(
                         Result(
-                            label=row[0],
+                            label=f"{self.name()} {row[0]}",
                             value=float(row[12]) * 1000,  # convert to ms
                             passed=(row[1] == "PASS"),
                             command=command,
@@ -161,7 +161,7 @@ def teardown(self):
         return
 
     def name(self):
-        return self.test
+        return f"{self.bench.name()} {self.test}"
 
 
 # multi benchmarks
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 06eac12b25344..18794d4e9c73c 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -19,6 +19,9 @@ def __init__(self):
     def setup(self):
         return
 
+    def name(self) -> str:
+        return "Test Suite"
+
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
             ("Memory Bandwidth", 2000, 200, "Foo Group"),
@@ -36,18 +39,18 @@ def benchmarks(self) -> list[Benchmark]:
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(name, value, diff, group))
+                result.append(TestBench(self, name, value, diff, group))
 
         return result
 
 
 class TestBench(Benchmark):
-    def __init__(self, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group=""):
+        super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
-        super().__init__("")
 
     def name(self):
         return self.bname
@@ -58,6 +61,9 @@ def lower_is_better(self):
     def setup(self):
         return
 
+    def description(self) -> str:
+        return f"This is a test benchmark for {self.bname}."
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
@@ -65,10 +71,11 @@ def run(self, env_vars) -> list[Result]:
                 label=self.name(),
                 explicit_group=self.group,
                 value=random_value,
-                command="",
+                command=["test", "--arg1", "foo"],
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
+                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 15c343b9a9845..1f736e7755f92 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -6,10 +6,10 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import os
 import csv
 import io
@@ -22,8 +22,6 @@ def isUMFAvailable():
 class UMFSuite(Suite):
     def __init__(self, directory):
         self.directory = directory
-        if not isUMFAvailable():
-            print("UMF not provided. Related benchmarks will not run")
 
     def name(self) -> str:
         return "UMF"
@@ -40,6 +38,8 @@ def benchmarks(self) -> list[Benchmark]:
         benches = [
             GBench(self),
             GBenchUmfProxy(self),
+            GBenchJemalloc(self),
+            GBenchTbbProxy(self),
         ]
 
         return benches
@@ -220,10 +220,31 @@ def parse_output(self, output):
         return results
 
 
-class GBenchUmfProxy(GBenchPreloaded):
+class GBenchGlibc(GBenchPreloaded):
+    def __init__(self, bench, replacing_lib):
+        super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib=replacing_lib)
+
+
+class GBenchUmfProxy(GBenchGlibc):
     def __init__(self, bench):
-        super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib="umfProxy")
+        super().__init__(bench, replacing_lib="umfProxy")
 
     def extra_env_vars(self) -> dict:
         umf_proxy_path = os.path.join(options.umf, "lib", "libumf_proxy.so")
         return {"LD_PRELOAD": umf_proxy_path}
+
+
+class GBenchJemalloc(GBenchGlibc):
+    def __init__(self, bench):
+        super().__init__(bench, replacing_lib="jemalloc")
+
+    def extra_env_vars(self) -> dict:
+        return {"LD_PRELOAD": "libjemalloc.so"}
+
+
+class GBenchTbbProxy(GBenchGlibc):
+    def __init__(self, bench):
+        super().__init__(bench, replacing_lib="tbbProxy")
+
+    def extra_env_vars(self) -> dict:
+        return {"LD_PRELOAD": "libtbbmalloc_proxy.so"}
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index b7d06cbe4a3a2..be36c47ca36d5 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -7,10 +7,10 @@
 import shutil
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from .result import Result
+from utils.result import Result
 from utils.utils import run, create_build_path
 from options import options
-from .oneapi import get_oneapi
+from utils.oneapi import get_oneapi
 import shutil
 
 import os
@@ -115,6 +115,9 @@ def extra_env_vars(self) -> dict:
     def parse_output(self, stdout: str) -> float:
         raise NotImplementedError()
 
+    def description(self) -> str:
+        return ""
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
@@ -133,6 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
+                description=self.description()
             )
         ]
 
@@ -147,6 +151,12 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Hashtable"
 
+    def description(self) -> str:
+        return (
+            "Measures hash table search performance using an efficient lock-free algorithm with linear probing. "
+            "Reports throughput in millions of keys processed per second. Higher values indicate better performance."
+        )
+
     def bin_args(self) -> list[str]:
         return ["--no-verify"]
 
@@ -170,6 +180,13 @@ def __init__(self, vb: VelocityBench):
     def name(self):
         return "Velocity-Bench Bitcracker"
 
+    def description(self) -> str:
+        return (
+            "Password-cracking application for BitLocker-encrypted memory units. "
+            "Uses dictionary attack to find user or recovery passwords. "
+            "Measures total time required to process 60000 passwords."
+        )
+
     def bin_args(self) -> list[str]:
         self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass")
 
@@ -204,11 +221,19 @@ def download_deps(self):
             "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=",
             "sobel_filter_data.tgz",
             untar=True,
+            checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66",
         )
 
     def name(self):
         return "Velocity-Bench Sobel Filter"
 
+    def description(self) -> str:
+        return (
+            "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter "
+            "to reduce edge artifacts. Processes a large 32K x 32K image and measures "
+            "the time required to apply the filter."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-i",
@@ -249,6 +274,13 @@ def run(self, env_vars) -> list[Result]:
     def name(self):
         return "Velocity-Bench QuickSilver"
 
+    def description(self) -> str:
+        return (
+            "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. "
+            "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. "
+            "Reports a figure of merit in MMS/CTT where higher values indicate better performance."
+        )
+
     def lower_is_better(self):
         return False
 
@@ -279,14 +311,22 @@ def __init__(self, vb: VelocityBench):
     def download_deps(self):
         self.download(
             "easywave",
-            "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz",
+            "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz",
             "examples.tar.gz",
             untar=True,
+            checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1",
         )
 
     def name(self):
         return "Velocity-Bench Easywave"
 
+    def description(self) -> str:
+        return (
+            "A tsunami wave simulator used for researching tsunami generation and wave propagation. "
+            "Measures the elapsed time in milliseconds to simulate a specified tsunami event "
+            "based on real-world data."
+        )
+
     def bin_args(self) -> list[str]:
         return [
             "-grid",
@@ -341,6 +381,13 @@ def download_deps(self):
     def name(self):
         return "Velocity-Bench CudaSift"
 
+    def description(self) -> str:
+        return (
+            "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm "
+            "for detecting, describing, and matching local features in images. "
+            "Measures average processing time in milliseconds."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout)
         if match:
@@ -364,6 +411,7 @@ def download_deps(self):
             "cifar-10-binary.tar.gz",
             untar=True,
             skip_data_dir=True,
+            checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814",
         )
         return
 
@@ -382,6 +430,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-cifar"
 
+    def description(self) -> str:
+        return (
+            "Deep learning image classification workload based on the CIFAR-10 dataset "
+            "of 60,000 32x32 color images in 10 classes. Uses neural networks to "
+            "classify input images and measures total calculation time."
+        )
+
     def parse_output(self, stdout: str) -> float:
         match = re.search(
             r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout
@@ -407,6 +462,7 @@ def download_deps(self):
             "train-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee",
         )
         self.download(
             "datasets",
@@ -414,6 +470,7 @@ def download_deps(self):
             "train-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00",
         )
         self.download(
             "datasets",
@@ -421,6 +478,7 @@ def download_deps(self):
             "t10k-images.idx3-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c",
         )
         self.download(
             "datasets",
@@ -428,6 +486,7 @@ def download_deps(self):
             "t10k-labels.idx1-ubyte.gz",
             unzip=True,
             skip_data_dir=True,
+            checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d",
         )
 
     def extra_cmake_args(self):
@@ -445,6 +504,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench dl-mnist"
 
+    def description(self) -> str:
+        return (
+            "Digit recognition based on the MNIST database, one of the oldest and most popular "
+            "databases of handwritten digits. Uses neural networks to identify digits "
+            "and measures total calculation time."
+        )
+
     def bin_args(self):
         return ["-conv_algo", "ONEDNN_AUTO"]
 
@@ -488,6 +554,13 @@ def extra_cmake_args(self):
     def name(self):
         return "Velocity-Bench svm"
 
+    def description(self) -> str:
+        return (
+            "Implementation of Support Vector Machine, a popular classical machine learning technique. "
+            "Uses supervised learning models with associated algorithms to analyze data "
+            "for classification and regression analysis. Measures total elapsed time."
+        )
+
     def bin_args(self):
         return [
             f"{self.code_path}/a9a",
diff --git a/devops/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template
deleted file mode 100644
index 1deeedad66b00..0000000000000
--- a/devops/scripts/benchmarks/benchmark_results.html.template
+++ /dev/null
@@ -1,192 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <title>Benchmark Results</title>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-            overflow-x: auto;
-        }
-        .chart > div {
-            min-width: 600px;
-            margin: 0 auto;
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            font-size: 18px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 12px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-    </style>
-    <script>
-        function getQueryParam(param) {
-            const urlParams = new URLSearchParams(window.location.search);
-            return urlParams.get(param);
-        }
-
-        function filterCharts() {
-            const regexInput = document.getElementById('bench-filter').value;
-            const regex = new RegExp(regexInput, 'i');
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-            const charts = document.querySelectorAll('.chart');
-
-            charts.forEach(chart => {
-                const label = chart.getAttribute('data-label');
-                const suite = chart.getAttribute('data-suite');
-                if (regex.test(label) && activeSuites.includes(suite)) {
-                    chart.style.display = '';
-                } else {
-                    chart.style.display = 'none';
-                }
-            });
-
-            updateURL();
-        }
-
-        function updateURL() {
-            const url = new URL(window.location);
-            const regex = document.getElementById('bench-filter').value;
-            const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
-
-            if (regex) {
-                url.searchParams.set('regex', regex);
-            } else {
-                url.searchParams.delete('regex');
-            }
-
-            if (activeSuites.length > 0) {
-                url.searchParams.set('suites', activeSuites.join(','));
-            } else {
-                url.searchParams.delete('suites');
-            }
-
-            history.replaceState(null, '', url);
-        }
-
-        document.addEventListener('DOMContentLoaded', (event) => {
-            const regexParam = getQueryParam('regex');
-            const suitesParam = getQueryParam('suites');
-
-            if (regexParam) {
-                document.getElementById('bench-filter').value = regexParam;
-            }
-
-            const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
-            if (suitesParam) {
-                const suites = suitesParam.split(',');
-                suiteCheckboxes.forEach(checkbox => {
-                    if (suites.includes(checkbox.getAttribute('data-suite'))) {
-                        checkbox.checked = true;
-                    } else {
-                        checkbox.checked = false;
-                    }
-                });
-            } else {
-                suiteCheckboxes.forEach(checkbox => {
-                    checkbox.checked = true;
-                });
-            }
-            filterCharts();
-
-            suiteCheckboxes.forEach(checkbox => {
-                checkbox.addEventListener('change', () => {
-                    filterCharts();
-                });
-            });
-
-            document.getElementById('bench-filter').addEventListener('input', () => {
-                filterCharts();
-            });
-        });
-    </script>
-</head>
-<body>
-    <div class="container">
-        <h1>Benchmark Results</h1>
-        <div class="filter-container">
-            <input type="text" id="bench-filter" placeholder="Regex...">
-        </div>
-        <div class="suite-filter-container">
-            ${suite_checkboxes_html}
-        </div>
-        <details class="timeseries">
-            <summary>Historical Results</summary>
-            <div class="charts">
-                ${timeseries_charts_html}
-            </div>
-        </details>
-        <details class="bar-charts">
-            <summary>Comparisons</summary>
-            <div class="charts">
-                ${bar_charts_html}
-            </div>
-        </details>
-    </div>
-</body>
-</html>
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 7902aa4f04c35..2bb0b9db8ea38 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,7 +6,7 @@
 import os
 import json
 from pathlib import Path
-from benches.result import Result, BenchmarkRun
+from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
@@ -63,12 +63,29 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
             result = run("git rev-parse --short HEAD")
             git_hash = result.stdout.decode().strip()
+
+            # Get the GitHub repo URL from git remote
+            remote_result = run("git remote get-url origin")
+            remote_url = remote_result.stdout.decode().strip()
+
+            # Convert SSH or HTTPS URL to owner/repo format
+            if remote_url.startswith("git@github.com:"):
+                # SSH format: git@github.com:owner/repo.git
+                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+            elif remote_url.startswith("https://github.com/"):
+                # HTTPS format: https://github.com/owner/repo.git
+                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+            else:
+                github_repo = None
+
         except:
             git_hash = "unknown"
+            github_repo = None
 
         return BenchmarkRun(
             name=name,
             git_hash=git_hash,
+            github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
         )
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
new file mode 100644
index 0000000000000..c1210b2b21da5
--- /dev/null
+++ b/devops/scripts/benchmarks/html/config.js
@@ -0,0 +1,5 @@
+const config = {
+    remoteDataUrl: ''
+};
+// defaultCompareNames = [];
+// suiteNames = [];
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
new file mode 100644
index 0000000000000..c10844f15c707
--- /dev/null
+++ b/devops/scripts/benchmarks/html/index.html
@@ -0,0 +1,205 @@
+<!--
+  Copyright (C) 2024 Intel Corporation
+  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+  See LICENSE.TXT
+  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+-->
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Benchmark Results</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chartjs-adapter-date-fns"></script>
+    <script src="data.js"></script>
+    <script src="config.js"></script>
+    <script src="scripts.js"></script>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            margin: 0;
+            padding: 16px;
+            background: #f8f9fa;
+        }
+        .container {
+            max-width: 1100px;
+            margin: 0 auto;
+        }
+        h1, h2 {
+            color: #212529;
+            text-align: center;
+            margin-bottom: 24px;
+            font-weight: 500;
+        }
+        .chart-container {
+            background: white;
+            border-radius: 8px;
+            padding: 24px;
+            margin-bottom: 24px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        }
+        @media (max-width: 768px) {
+            body {
+                padding: 12px;
+            }
+            .chart-container {
+                padding: 16px;
+                border-radius: 6px;
+            }
+            h1 {
+                font-size: 24px;
+                margin-bottom: 16px;
+            }
+        }
+        .filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+        }
+        .filter-container input {
+            padding: 8px;
+            font-size: 16px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            width: 400px;
+            max-width: 100%;
+        }
+        .suite-filter-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .suite-checkbox {
+            margin: 0 8px;
+        }
+        details {
+            margin-bottom: 24px;
+        }
+        summary {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            font-size: 16px;
+            font-weight: 500;
+            cursor: pointer;
+            padding: 8px;
+            background: #e9ecef;
+            border-radius: 8px;
+            user-select: none;
+        }
+        summary:hover {
+            background: #dee2e6;
+        }
+        .extra-info {
+            padding: 8px;
+            background: #f8f9fa;
+            border-radius: 8px;
+            margin-top: 8px;
+        }
+        .run-selector {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .run-selector select {
+            width: 300px;
+            padding: 8px;
+            margin-right: 8px;
+        }
+        .run-selector button {
+            padding: 8px 16px;
+            background: #0068B5;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        .run-selector button:hover {
+            background: #00C7FD;
+        }
+        .selected-runs {
+            margin-top: 12px;
+        }
+        .selected-run {
+            display: inline-block;
+            padding: 4px 8px;
+            margin: 4px;
+            background: #e2e6ea;
+            border-radius: 4px;
+        }
+        .selected-run button {
+            margin-left: 8px;
+            padding: 0 4px;
+            background: none;
+            border: none;
+            color: #dc3545;
+            cursor: pointer;
+        }
+        .download-button {
+            background: none;
+            border: none;
+            color: #0068B5;
+            cursor: pointer;
+            font-size: 16px;
+            padding: 4px;
+            margin-left: 8px;
+        }
+        .download-button:hover {
+            color: #00C7FD;
+        }
+        .loading-indicator {
+            text-align: center;
+            font-size: 18px;
+            color: #0068B5;
+            margin-bottom: 20px;
+        }
+        .extra-info-entry {
+            border: 1px solid #ddd;
+            padding: 10px;
+            margin-bottom: 10px;
+            background-color: #f9f9f9;
+            border-radius: 5px;
+        }
+        .extra-info-entry strong {
+            display: block;
+            margin-bottom: 5px;
+        }
+        .extra-info-entry em {
+            color: #555;
+        }
+</style>
+</head>
+<body>
+    <div class="container">
+        <h1>Benchmark Results</h1>
+        <div id="loading-indicator" class="loading-indicator" style="display: none;">
+            Loading data, please wait...
+        </div>
+        <div class="filter-container">
+            <input type="text" id="bench-filter" placeholder="Regex...">
+        </div>
+        <div class="suite-filter-container" id="suite-filters">
+            <!-- Suite checkboxes will be generated by JavaScript -->
+        </div>
+        <div class="run-selector">
+            <select id="run-select">
+                <option value="">Select a run to compare...</option>
+            </select>
+            <button onclick="addSelectedRun()">Add</button>
+            <div id="selected-runs" class="selected-runs"></div>
+        </div>
+        <details class="timeseries" open>
+            <summary>Historical Results</summary>
+            <div class="charts"></div>
+        </details>
+        <details class="bar-charts" open>
+            <summary>Comparisons</summary>
+            <div class="charts"></div>
+        </details>
+    </div>
+</body>
+</html>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
new file mode 100644
index 0000000000000..8f0272048136d
--- /dev/null
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -0,0 +1,556 @@
+// Copyright (C) 2024 Intel Corporation
+// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+// See LICENSE.TXT
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Core state
+let activeRuns = new Set(defaultCompareNames);
+let chartInstances = new Map();
+let timeseriesData, barChartsData, allRunNames;
+
+// DOM Elements
+let runSelect, selectedRunsDiv, suiteFiltersContainer;
+
+// Run selector functions
+function updateSelectedRuns() {
+    selectedRunsDiv.innerHTML = '';
+    activeRuns.forEach(name => {
+        selectedRunsDiv.appendChild(createRunElement(name));
+    });
+    updateCharts();
+}
+
+function createRunElement(name) {
+    const runElement = document.createElement('span');
+    runElement.className = 'selected-run';
+    runElement.innerHTML = `${name} <button onclick="removeRun('${name}')">X</button>`;
+    return runElement;
+}
+
+function addSelectedRun() {
+    const selectedRun = runSelect.value;
+    if (selectedRun && !activeRuns.has(selectedRun)) {
+        activeRuns.add(selectedRun);
+        updateSelectedRuns();
+    }
+}
+
+function removeRun(name) {
+    activeRuns.delete(name);
+    updateSelectedRuns();
+}
+
+// Chart creation and update
+function createChart(data, containerId, type) {
+    if (chartInstances.has(containerId)) {
+        chartInstances.get(containerId).destroy();
+    }
+
+    const ctx = document.getElementById(containerId).getContext('2d');
+    const options = {
+        responsive: true,
+        plugins: {
+            title: {
+                display: true,
+                text: data.label
+            },
+            subtitle: {
+                display: true,
+                text: data.lower_is_better ? "Lower is better" : "Higher is better"
+            },
+            tooltip: {
+                callbacks: {
+                    label: (context) => {
+                        if (type === 'time') {
+                            const point = context.raw;
+                            return [
+                                `${data.label}:`,
+                                `Value: ${point.y.toFixed(2)} ${data.unit}`,
+                                `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`,
+                                `Git Hash: ${point.gitHash}`,
+                            ];
+                        } else {
+                            return [`${context.dataset.label}:`,
+                                `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`,
+                            ];
+                        }
+                    }
+                }
+            }
+        },
+        scales: {
+            y: {
+                title: {
+                    display: true,
+                    text: data.unit
+                }
+            }
+        }
+    };
+
+    if (type === 'time') {
+        options.interaction = {
+            mode: 'nearest',
+            intersect: false
+        };
+        options.onClick = (event, elements) => {
+            if (elements.length > 0) {
+                const point = elements[0].element.$context.raw;
+                if (point.gitHash && point.gitRepo) {
+                    window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank');
+                }
+            }
+        };
+        options.scales.x = {
+            type: 'time',
+            ticks: {
+                maxRotation: 45,
+                minRotation: 45,
+                autoSkip: true,
+                maxTicksLimit: 10
+            }
+        };
+    }
+
+    const chartConfig = {
+        type: type === 'time' ? 'line' : 'bar',
+        data: type === 'time' ?
+            {
+                datasets: createTimeseriesDatasets(data)
+            } :
+            {
+                labels: data.labels,
+                datasets: data.datasets
+            },
+        options: options
+    };
+
+    const chart = new Chart(ctx, chartConfig);
+    chartInstances.set(containerId, chart);
+    return chart;
+}
+
+function createTimeseriesDatasets(data) {
+    return Object.entries(data.runs).map(([name, points]) => ({
+        label: name,
+        data: points.map(p => ({
+            x: new Date(p.date),
+            y: p.value,
+            gitHash: p.git_hash,
+            gitRepo: p.github_repo,
+            stddev: p.stddev
+        })),
+        borderWidth: 1,
+        pointRadius: 3,
+        pointStyle: 'circle',
+        pointHoverRadius: 5
+    }));
+}
+
+function updateCharts() {
+    // Filter data by active runs
+    const filteredTimeseriesData = timeseriesData.map(chart => ({
+        ...chart,
+        runs: Object.fromEntries(
+            Object.entries(chart.runs).filter(([name]) => activeRuns.has(name))
+        )
+    }));
+
+    const filteredBarChartsData = barChartsData.map(chart => ({
+        ...chart,
+        labels: chart.labels.filter(label => activeRuns.has(label)),
+        datasets: chart.datasets.map(dataset => ({
+            ...dataset,
+            data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i]))
+        }))
+    }));
+
+    // Draw charts with filtered data
+    drawCharts(filteredTimeseriesData, filteredBarChartsData);
+}
+
+function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
+    // Clear existing charts
+    document.querySelectorAll('.charts').forEach(container => container.innerHTML = '');
+    chartInstances.forEach(chart => chart.destroy());
+    chartInstances.clear();
+
+    // Create timeseries charts
+    filteredTimeseriesData.forEach((data, index) => {
+        const containerId = `timeseries-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.timeseries .charts').appendChild(container);
+        createChart(data, containerId, 'time');
+    });
+
+    // Create bar charts
+    filteredBarChartsData.forEach((data, index) => {
+        const containerId = `barchart-${index}`;
+        const container = createChartContainer(data, containerId);
+        document.querySelector('.bar-charts .charts').appendChild(container);
+        createChart(data, containerId, 'bar');
+    });
+
+    // Apply current filters
+    filterCharts();
+}
+
+function createChartContainer(data, canvasId) {
+    const container = document.createElement('div');
+    container.className = 'chart-container';
+    container.setAttribute('data-label', data.label);
+    container.setAttribute('data-suite', data.suite);
+
+    const canvas = document.createElement('canvas');
+    canvas.id = canvasId;
+    container.appendChild(canvas);
+
+    // Create details section for extra info
+    const details = document.createElement('details');
+    const summary = document.createElement('summary');
+    summary.textContent = "Details";
+
+    // Add subtle download button to the summary
+    const downloadButton = document.createElement('button');
+    downloadButton.className = 'download-button';
+    downloadButton.textContent = 'Download';
+    downloadButton.onclick = (event) => {
+        event.stopPropagation(); // Prevent details toggle
+        downloadChart(canvasId, data.label);
+    };
+    summary.appendChild(downloadButton);
+    details.appendChild(summary);
+
+    // Create and append extra info
+    const extraInfo = document.createElement('div');
+    extraInfo.className = 'extra-info';
+    extraInfo.innerHTML = generateExtraInfo(data);
+    details.appendChild(extraInfo);
+
+    container.appendChild(details);
+
+    return container;
+}
+
+// Pre-compute a lookup for the latest run per label
+function createLatestRunsLookup(benchmarkRuns) {
+    const latestRunsMap = new Map();
+
+    benchmarkRuns.forEach(run => {
+        // Yes, we need to convert the date every time. I checked.
+        const runDate = new Date(run.date);
+        run.results.forEach(result => {
+            const label = result.label;
+            if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) {
+                latestRunsMap.set(label, {
+                    run,
+                    result
+                });
+            }
+        });
+    });
+
+    return latestRunsMap;
+}
+const latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
+function generateExtraInfo(data) {
+    const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
+
+    return labels.map(label => {
+        const latestRun = latestRunsLookup.get(label);
+
+        if (latestRun) {
+            return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>
+                        <em>Description:</em> ${latestRun.result.description}
+                    </div>`;
+        }
+        return `<div class="extra-info-entry">
+                        <strong>${label}:</strong> No data available
+                </div>`;
+    }).join('');
+}
+
+function formatCommand(run) {
+    const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' ');
+    let command = run.command ? [...run.command] : [];
+
+    return `${envVars} ${command.join(' ')}`.trim();
+}
+
+function downloadChart(canvasId, label) {
+    const chart = chartInstances.get(canvasId);
+    if (chart) {
+        const link = document.createElement('a');
+        link.href = chart.toBase64Image('image/jpeg', 1)
+        link.download = `${label}.png`;
+        link.click();
+    }
+}
+
+// URL and filtering functions
+function getQueryParam(param) {
+    const urlParams = new URLSearchParams(window.location.search);
+    return urlParams.get(param);
+}
+
+function updateURL() {
+    const url = new URL(window.location);
+    const regex = document.getElementById('bench-filter').value;
+    const activeSuites = getActiveSuites();
+    const activeRunsList = Array.from(activeRuns);
+
+    if (regex) {
+        url.searchParams.set('regex', regex);
+    } else {
+        url.searchParams.delete('regex');
+    }
+
+    if (activeSuites.length > 0) {
+        url.searchParams.set('suites', activeSuites.join(','));
+    } else {
+        url.searchParams.delete('suites');
+    }
+
+    // Handle the runs parameter
+    if (activeRunsList.length > 0) {
+        // Check if the active runs are the same as default runs
+        const defaultRuns = new Set(defaultCompareNames || []);
+        const isDefaultRuns = activeRunsList.length === defaultRuns.size &&
+            activeRunsList.every(run => defaultRuns.has(run));
+
+        if (isDefaultRuns) {
+            // If it's just the default runs, omit the parameter entirely
+            url.searchParams.delete('runs');
+        } else {
+            url.searchParams.set('runs', activeRunsList.join(','));
+        }
+    } else {
+        url.searchParams.delete('runs');
+    }
+
+    history.replaceState(null, '', url);
+}
+
+function filterCharts() {
+    const regexInput = document.getElementById('bench-filter').value;
+    const regex = new RegExp(regexInput, 'i');
+    const activeSuites = getActiveSuites();
+
+    document.querySelectorAll('.chart-container').forEach(container => {
+        const label = container.getAttribute('data-label');
+        const suite = container.getAttribute('data-suite');
+        container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+    });
+
+    updateURL();
+}
+
+function getActiveSuites() {
+    return Array.from(document.querySelectorAll('.suite-checkbox:checked'))
+        .map(checkbox => checkbox.getAttribute('data-suite'));
+}
+
+// Data processing
+function processTimeseriesData(benchmarkRuns) {
+    const resultsByLabel = {};
+
+    benchmarkRuns.forEach(run => {
+        const runDate = run.date ? new Date(run.date) : null;
+        run.results.forEach(result => {
+            if (!resultsByLabel[result.label]) {
+                resultsByLabel[result.label] = {
+                    label: result.label,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    runs: {}
+                };
+            }
+
+            if (!resultsByLabel[result.label].runs[run.name]) {
+                resultsByLabel[result.label].runs[run.name] = [];
+            }
+
+            resultsByLabel[result.label].runs[run.name].push({
+                date: runDate,
+                value: result.value,
+                stddev: result.stddev,
+                git_hash: run.git_hash,
+                github_repo: run.github_repo
+            });
+        });
+    });
+
+    return Object.values(resultsByLabel);
+}
+
+function processBarChartsData(benchmarkRuns) {
+    const groupedResults = {};
+
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            if (!result.explicit_group) return;
+
+            if (!groupedResults[result.explicit_group]) {
+                groupedResults[result.explicit_group] = {
+                    label: result.explicit_group,
+                    suite: result.suite,
+                    unit: result.unit,
+                    lower_is_better: result.lower_is_better,
+                    labels: [],
+                    datasets: []
+                };
+            }
+
+            const group = groupedResults[result.explicit_group];
+
+            if (!group.labels.includes(run.name)) {
+                group.labels.push(run.name);
+            }
+
+            let dataset = group.datasets.find(d => d.label === result.label);
+            if (!dataset) {
+                dataset = {
+                    label: result.label,
+                    data: new Array(group.labels.length).fill(null)
+                };
+                group.datasets.push(dataset);
+            }
+
+            const runIndex = group.labels.indexOf(run.name);
+            dataset.data[runIndex] = result.value;
+        });
+    });
+
+    return Object.values(groupedResults);
+}
+
+// Setup functions
+function setupRunSelector() {
+    runSelect = document.getElementById('run-select');
+    selectedRunsDiv = document.getElementById('selected-runs');
+
+    allRunNames.forEach(name => {
+        const option = document.createElement('option');
+        option.value = name;
+        option.textContent = name;
+        runSelect.appendChild(option);
+    });
+
+    updateSelectedRuns();
+}
+
+function setupSuiteFilters() {
+    suiteFiltersContainer = document.getElementById('suite-filters');
+
+    suiteNames.forEach(suite => {
+        const label = document.createElement('label');
+        const checkbox = document.createElement('input');
+        checkbox.type = 'checkbox';
+        checkbox.className = 'suite-checkbox';
+        checkbox.dataset.suite = suite;
+        checkbox.checked = true;
+        label.appendChild(checkbox);
+        label.appendChild(document.createTextNode(' ' + suite));
+        suiteFiltersContainer.appendChild(label);
+        suiteFiltersContainer.appendChild(document.createTextNode(' '));
+    });
+}
+
+function initializeCharts() {
+    // Process raw data
+    timeseriesData = processTimeseriesData(benchmarkRuns);
+    barChartsData = processBarChartsData(benchmarkRuns);
+    allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
+
+    // Set up active runs
+    const runsParam = getQueryParam('runs');
+    if (runsParam) {
+        const runsFromUrl = runsParam.split(',');
+
+        // Start with an empty set
+        activeRuns = new Set();
+
+        // Process each run from URL
+        runsFromUrl.forEach(run => {
+            if (run === 'default') {
+                // Special case: include all default runs
+                (defaultCompareNames || []).forEach(defaultRun => {
+                    if (allRunNames.includes(defaultRun)) {
+                        activeRuns.add(defaultRun);
+                    }
+                });
+            } else if (allRunNames.includes(run)) {
+                // Add the specific run if it exists
+                activeRuns.add(run);
+            }
+        });
+    } else {
+        // No runs parameter, use defaults
+        activeRuns = new Set(defaultCompareNames || []);
+    }
+
+    // Setup UI components
+    setupRunSelector();
+    setupSuiteFilters();
+
+    // Apply URL parameters
+    const regexParam = getQueryParam('regex');
+    const suitesParam = getQueryParam('suites');
+
+    if (regexParam) {
+        document.getElementById('bench-filter').value = regexParam;
+    }
+
+    if (suitesParam) {
+        const suites = suitesParam.split(',');
+        document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+            checkbox.checked = suites.includes(checkbox.getAttribute('data-suite'));
+        });
+    }
+
+    // Setup event listeners
+    document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
+        checkbox.addEventListener('change', filterCharts);
+    });
+    document.getElementById('bench-filter').addEventListener('input', filterCharts);
+
+    // Draw initial charts
+    updateCharts();
+}
+
+// Make functions available globally for onclick handlers
+window.addSelectedRun = addSelectedRun;
+window.removeRun = removeRun;
+
+// Load data based on configuration
+function loadData() {
+    const loadingIndicator = document.getElementById('loading-indicator');
+    loadingIndicator.style.display = 'block'; // Show loading indicator
+
+    if (config.remoteDataUrl && config.remoteDataUrl !== '') {
+        // Fetch data from remote URL
+        fetch(config.remoteDataUrl)
+            .then(response => response.text())
+            .then(scriptContent => {
+                // Evaluate the script content
+                eval(scriptContent);
+                initializeCharts();
+            })
+            .catch(error => console.error('Error fetching remote data:', error))
+            .finally(() => {
+                loadingIndicator.style.display = 'none'; // Hide loading indicator
+            });
+    } else {
+        // Use local data
+        initializeCharts();
+        loadingIndicator.style.display = 'none'; // Hide loading indicator
+    }
+}
+
+// Initialize when DOM is ready
+document.addEventListener('DOMContentLoaded', () => {
+    loadData();
+});
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 4ad90b39b9001..8f5330d7b4f62 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -27,23 +27,27 @@
 
 
 def run_iterations(
-    benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]]
+    benchmark: Benchmark,
+    env_vars,
+    iters: int,
+    results: dict[str, list[Result]],
+    failures: dict[str, str],
 ):
     for iter in range(iters):
-        print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True)
+        print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
         bench_results = benchmark.run(env_vars)
         if bench_results is None:
-            print(f"did not finish (OK for sycl-bench).")
+            failures[benchmark.name()] = "benchmark produced no results!"
             break
 
         for bench_result in bench_results:
-            # TODO: report failures in markdown/html ?
             if not bench_result.passed:
-                print(f"complete ({bench_result.label}: verification FAILED)")
+                failures[bench_result.label] = "verification failed"
+                print(f"complete ({bench_result.label}: verification failed).")
                 continue
 
             print(
-                f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
+                f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
             )
 
             bench_result.name = bench_result.label
@@ -156,6 +160,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     )
 
     benchmarks = []
+    failures = {}
 
     for s in suites:
         suite_benchmarks = s.benchmarks()
@@ -170,7 +175,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             print(f"Setting up {type(s).__name__}")
             try:
                 s.setup()
-            except:
+            except Exception as e:
+                failures[s.name()] = f"Suite setup failure: {e}"
                 print(f"{type(s).__name__} setup failed. Benchmarks won't be added.")
             else:
                 print(f"{type(s).__name__} setup complete.")
@@ -189,6 +195,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark setup failure: {e}"
                 print(f"failed: {e}")
 
     results = []
@@ -199,7 +206,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             processed: list[Result] = []
             for _ in range(options.iterations_stddev):
                 run_iterations(
-                    benchmark, merged_env_vars, options.iterations, intermediate_results
+                    benchmark,
+                    merged_env_vars,
+                    options.iterations,
+                    intermediate_results,
+                    failures,
                 )
                 valid, processed = process_results(
                     intermediate_results, benchmark.stddev_threshold()
@@ -211,12 +222,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             if options.exit_on_failure:
                 raise e
             else:
+                failures[benchmark.name()] = f"Benchmark run failure: {e}"
                 print(f"failed: {e}")
 
     for benchmark in benchmarks:
-        print(f"tearing down {benchmark.name()}... ", end="", flush=True)
+        # this never has any useful information anyway, so hide it behind verbose
+        if options.verbose:
+            print(f"tearing down {benchmark.name()}... ", flush=True)
         benchmark.teardown()
-        print("complete.")
+        if options.verbose:
+            print("{benchmark.name()} teardown complete.")
 
     this_name = options.current_run_name
     chart_data = {}
@@ -241,7 +256,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
 
     if options.output_markdown:
         markdown_content = generate_markdown(
-            this_name, chart_data, options.output_markdown
+            this_name, chart_data, failures, options.output_markdown
         )
 
         with open("benchmark_results.md", "w") as file:
@@ -262,14 +277,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        html_content = generate_html(history.runs, "intel/llvm", compare_names)
+        generate_html(history.runs, compare_names)
 
-        with open("benchmark_results.html", "w") as file:
-            file.write(html_content)
-
-        print(
-            f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html"
-        )
+        print(f"See {os.getcwd()}/html/index.html for the results.")
 
 
 def validate_and_parse_env_args(env_args):
@@ -305,6 +315,11 @@ def validate_and_parse_env_args(env_args):
         help="Do not rebuild the benchmarks from scratch.",
         action="store_true",
     )
+    parser.add_argument(
+        "--redownload",
+        help="Always download benchmark data dependencies, even if they already exist.",
+        action="store_true",
+    )
     parser.add_argument(
         "--env",
         type=str,
@@ -430,6 +445,7 @@ def validate_and_parse_env_args(env_args):
     options.workdir = args.benchmark_directory
     options.verbose = args.verbose
     options.rebuild = not args.no_rebuild
+    options.redownload = args.redownload
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 2e92675264544..206ca94eb0d0b 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -21,6 +21,7 @@ class Options:
     ur_adapter: str = None
     umf: str = None
     rebuild: bool = True
+    redownload: bool = False
     benchmark_cwd: str = "INVALID"
     timeout: float = 600
     iterations: int = 3
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 4ba395bc3aac6..e6e3212dbcdb2 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -3,338 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import re
+import json
 import os
-from pathlib import Path
-import matplotlib.pyplot as plt
-import mpld3
-from collections import defaultdict
-from dataclasses import dataclass
-import matplotlib.dates as mdates
-from benches.result import BenchmarkRun, Result
-import numpy as np
-from string import Template
 
 
-@dataclass
-class BenchmarkMetadata:
-    unit: str
-    suite: str
-    lower_is_better: bool
+def generate_html(benchmark_runs: list, compare_names: list[str]):
 
+    # Get unique suite names
+    suite_names = {result.suite for run in benchmark_runs for result in run.results}
 
-@dataclass
-class BenchmarkSeries:
-    label: str
-    metadata: BenchmarkMetadata
-    runs: list[BenchmarkRun]
+    # create path to data.js in html folder
+    data_path = os.path.join(os.path.dirname(__file__), "html", "data.js")
 
+    # Write data to js file
+    # We can't store this as a standalone json file because it needs to be inline in the html
+    with open(data_path, "w") as f:
+        f.write("const benchmarkRuns = [\n")
+        # it might be tempting to just to create a list and convert
+        # that to a json, but that leads to json being serialized twice.
+        for i, run in enumerate(benchmark_runs):
+            if i > 0:
+                f.write(",\n")
+            f.write(run.to_json())
 
-@dataclass
-class BenchmarkChart:
-    label: str
-    suite: str
-    html: str
+        f.write("\n];\n\n")  # terminates benchmarkRuns
 
-
-def tooltip_css() -> str:
-    return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}"
-
-
-def create_time_series_chart(
-    benchmarks: list[BenchmarkSeries], github_repo: str
-) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    num_benchmarks = len(benchmarks)
-    if num_benchmarks == 0:
-        return []
-
-    html_charts = []
-
-    for _, benchmark in enumerate(benchmarks):
-        fig, ax = plt.subplots(figsize=(10, 4))
-
-        all_values = []
-        all_stddevs = []
-
-        for run in benchmark.runs:
-            sorted_points = sorted(run.results, key=lambda x: x.date)
-            dates = [point.date for point in sorted_points]
-            values = [point.value for point in sorted_points]
-            stddevs = [point.stddev for point in sorted_points]
-
-            all_values.extend(values)
-            all_stddevs.extend(stddevs)
-
-            ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5)
-            scatter = ax.scatter(dates, values, picker=True)
-
-            tooltip_labels = [
-                f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
-                f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
-                f"Git Hash: {point.git_hash}"
-                for point in sorted_points
-            ]
-
-            targets = [
-                f"https://github.com/{github_repo}/commit/{point.git_hash}"
-                for point in sorted_points
-            ]
-
-            tooltip = mpld3.plugins.PointHTMLTooltip(
-                scatter, tooltip_labels, css=tooltip_css(), targets=targets
-            )
-            mpld3.plugins.connect(fig, tooltip)
-
-        ax.set_title(benchmark.label, pad=20)
-        performance_indicator = (
-            "lower is better"
-            if benchmark.metadata.lower_is_better
-            else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.05,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        ax.set_xlabel("")
-        unit = benchmark.metadata.unit
-        ax.set_ylabel(f"Value ({unit})" if unit else "Value")
-        ax.grid(True, alpha=0.2)
-        ax.legend(bbox_to_anchor=(1, 1), loc="upper left")
-        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S"))
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                html=mpld3.fig_to_html(fig),
-                label=benchmark.label,
-                suite=benchmark.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-@dataclass
-class ExplicitGroup:
-    name: str
-    nnames: int
-    metadata: BenchmarkMetadata
-    runs: dict[str, dict[str, Result]]
-
-
-def create_explicit_groups(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[ExplicitGroup]:
-    groups = {}
-
-    for run in benchmark_runs:
-        if run.name in compare_names:
-            for res in run.results:
-                if res.explicit_group != "":
-                    if res.explicit_group not in groups:
-                        groups[res.explicit_group] = ExplicitGroup(
-                            name=res.explicit_group,
-                            nnames=len(compare_names),
-                            metadata=BenchmarkMetadata(
-                                unit=res.unit,
-                                lower_is_better=res.lower_is_better,
-                                suite=res.suite,
-                            ),
-                            runs={},
-                        )
-
-                    group = groups[res.explicit_group]
-                    if res.label not in group.runs:
-                        group.runs[res.label] = {name: None for name in compare_names}
-
-                    if group.runs[res.label][run.name] is None:
-                        group.runs[res.label][run.name] = res
-
-    return list(groups.values())
-
-
-def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]:
-    plt.close("all")
-
-    html_charts = []
-
-    for group in groups:
-        fig, ax = plt.subplots(figsize=(10, 6))
-
-        x = np.arange(group.nnames)
-        x_labels = []
-        width = 0.8 / len(group.runs)
-
-        max_height = 0
-
-        for i, (run_name, run_results) in enumerate(group.runs.items()):
-            offset = width * i
-
-            positions = x + offset
-            x_labels = run_results.keys()
-            valid_data = [r.value if r is not None else 0 for r in run_results.values()]
-            rects = ax.bar(positions, valid_data, width, label=run_name)
-            # This is a hack to disable all bar_label. Setting labels to empty doesn't work.
-            # We create our own labels below for each bar, this works better in mpld3.
-            ax.bar_label(rects, fmt="")
-
-            for rect, run, res in zip(rects, run_results.keys(), run_results.values()):
-                if res is None:
-                    continue
-
-                height = rect.get_height()
-                if height > max_height:
-                    max_height = height
-
-                ax.text(
-                    rect.get_x() + rect.get_width() / 2.0,
-                    height + 1,
-                    f"{res.value:.1f}",
-                    ha="center",
-                    va="bottom",
-                    fontsize=9,
-                )
-
-                tooltip_labels = [
-                    f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
-                    f"Run: {run}\n"
-                    f"Label: {res.label}\n"
-                    f"Value: {res.value:.2f} {res.unit}\n"
-                    f"Stddev: {res.stddev:.2f} {res.unit}\n"
-                ]
-                tooltip = mpld3.plugins.LineHTMLTooltip(
-                    rect, tooltip_labels, css=tooltip_css()
-                )
-                mpld3.plugins.connect(ax.figure, tooltip)
-
-        # normally we'd just set legend to be outside
-        # the chart, but this is not supported by mpld3.
-        # instead, we adjust the y axis to account for
-        # the height of the bars.
-        legend_height = len(group.runs) * 0.1
-        ax.set_ylim(0, max_height * (1 + legend_height))
-
-        ax.set_xticks([])
-        ax.grid(True, axis="y", alpha=0.2)
-        ax.set_ylabel(f"Value ({group.metadata.unit})")
-        ax.legend(loc="upper left")
-        ax.set_title(group.name, pad=20)
-        performance_indicator = (
-            "lower is better" if group.metadata.lower_is_better else "higher is better"
-        )
-        ax.text(
-            0.5,
-            1.03,
-            f"({performance_indicator})",
-            ha="center",
-            transform=ax.transAxes,
-            style="italic",
-            fontsize=7,
-            color="#666666",
-        )
-
-        for idx, label in enumerate(x_labels):
-            # this is a hack to get labels to show above the legend
-            # we normalize the idx to transAxes transform and offset it a little.
-            x_norm = (idx + 0.3 - ax.get_xlim()[0]) / (
-                ax.get_xlim()[1] - ax.get_xlim()[0]
-            )
-            ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666")
-
-        plt.tight_layout()
-        html_charts.append(
-            BenchmarkChart(
-                label=group.name,
-                html=mpld3.fig_to_html(fig),
-                suite=group.metadata.suite,
-            )
-        )
-        plt.close(fig)
-
-    return html_charts
-
-
-def process_benchmark_data(
-    benchmark_runs: list[BenchmarkRun], compare_names: list[str]
-) -> list[BenchmarkSeries]:
-    benchmark_metadata: dict[str, BenchmarkMetadata] = {}
-    run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list))
-
-    for run in benchmark_runs:
-        if run.name not in compare_names:
-            continue
-
-        for result in run.results:
-            if result.label not in benchmark_metadata:
-                benchmark_metadata[result.label] = BenchmarkMetadata(
-                    unit=result.unit,
-                    lower_is_better=result.lower_is_better,
-                    suite=result.suite,
-                )
-
-            result.date = run.date
-            result.git_hash = run.git_hash
-            run_map[result.label][run.name].append(result)
-
-    benchmark_series = []
-    for label, metadata in benchmark_metadata.items():
-        runs = [
-            BenchmarkRun(name=run_name, results=results)
-            for run_name, results in run_map[label].items()
-        ]
-        benchmark_series.append(
-            BenchmarkSeries(label=label, metadata=metadata, runs=runs)
-        )
-
-    return benchmark_series
-
-
-def generate_html(
-    benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]
-) -> str:
-    benchmarks = process_benchmark_data(benchmark_runs, compare_names)
-
-    timeseries = create_time_series_chart(benchmarks, github_repo)
-    timeseries_charts_html = "\n".join(
-        f'<div class="chart" data-label="{ts.label}" data-suite="{ts.suite}"><div>{ts.html}</div></div>'
-        for ts in timeseries
-    )
-
-    explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
-
-    bar_charts = create_grouped_bar_charts(explicit_groups)
-    bar_charts_html = "\n".join(
-        f'<div class="chart" data-label="{bc.label}" data-suite="{bc.suite}"><div>{bc.html}</div></div>'
-        for bc in bar_charts
-    )
-
-    suite_names = {t.suite for t in timeseries}
-    suite_checkboxes_html = " ".join(
-        f'<label><input type="checkbox" class="suite-checkbox" data-suite="{suite}" checked> {suite}</label>'
-        for suite in suite_names
-    )
-
-    script_path = os.path.dirname(os.path.realpath(__file__))
-    results_template_path = Path(script_path, "benchmark_results.html.template")
-    with open(results_template_path, "r") as file:
-        html_template = file.read()
-
-    template = Template(html_template)
-    data = {
-        "suite_checkboxes_html": suite_checkboxes_html,
-        "timeseries_charts_html": timeseries_charts_html,
-        "bar_charts_html": bar_charts_html,
-    }
-
-    return template.substitute(data)
+        # these are not const because they might be modified
+        # in config.js
+        f.write("defaultCompareNames = ")
+        json.dump(compare_names, f)
+        f.write(";\n\n")  # terminates defaultCompareNames
+        f.write("suiteNames = ")
+        json.dump(list(suite_names), f)
+        f.write(";")  # terminates suiteNames
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index dd6711cec6365..18b5779473a75 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -5,7 +5,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import collections
-from benches.result import Result
+from utils.result import Result
 from options import options, MarkdownSize
 import ast
 
@@ -138,17 +138,6 @@ def generate_markdown_details(
         env_dict = res.env
         command = res.command
 
-        # If data is collected from already saved results,
-        # the content is parsed as strings
-        if isinstance(res.env, str):
-            # Since the scripts would be used solely on data prepared
-            # by our scripts, this should be safe
-            # However, maybe needs an additional blessing
-            # https://docs.python.org/3/library/ast.html#ast.literal_eval
-            env_dict = ast.literal_eval(res.env)
-        if isinstance(res.command, str):
-            command = ast.literal_eval(res.command)
-
         section = (
             "\n<details>\n"
             f"<summary>{res.label}</summary>\n\n"
@@ -179,7 +168,7 @@ def generate_markdown_details(
             return "\nBenchmark details contain too many chars to display\n"
 
 
-def generate_summary_table_and_chart(
+def generate_summary_table(
     chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize
 ):
     summary_table = get_chart_markdown_header(
@@ -374,10 +363,27 @@ def generate_summary_table_and_chart(
                 return "\n# Summary\n" "Benchmark output is too large to display\n\n"
 
 
+def generate_failures_section(failures: dict[str, str]) -> str:
+    if not failures:
+        return ""
+
+    section = "\n# Failures\n"
+    section += "| Name | Failure |\n"
+    section += "|---|---|\n"
+
+    for name, failure in failures.items():
+        section += f"| {name} | {failure} |\n"
+
+    return section
+
+
 def generate_markdown(
-    name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize
+    name: str,
+    chart_data: dict[str, list[Result]],
+    failures: dict[str, str],
+    markdown_size: MarkdownSize,
 ):
-    (summary_line, summary_table) = generate_summary_table_and_chart(
+    (summary_line, summary_table) = generate_summary_table(
         chart_data, name, markdown_size
     )
 
@@ -396,4 +402,6 @@ def generate_markdown(
         )
         generated_markdown += "\n# Details\n" f"{markdown_details}\n"
 
-    return generated_markdown
+    failures_section = generate_failures_section(failures)
+
+    return failures_section + generated_markdown
diff --git a/devops/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
similarity index 79%
rename from devops/scripts/benchmarks/benches/oneapi.py
rename to devops/scripts/benchmarks/utils/oneapi.py
index 0547f6646e39e..e1876b5ed37fb 100644
--- a/devops/scripts/benchmarks/benches/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -7,29 +7,33 @@
 from utils.utils import download, run
 from options import options
 import os
+import hashlib
 
 
 class OneAPI:
-    # random unique number for benchmark oneAPI installation
-    ONEAPI_BENCHMARK_INSTANCE_ID = 987654
-
     def __init__(self):
         self.oneapi_dir = os.path.join(options.workdir, "oneapi")
         Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
-        # delete if some option is set?
+        self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
 
         # can we just hardcode these links?
         self.install_package(
             "dnnl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
+            "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
         )
         self.install_package(
             "mkl",
             "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
+            "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
         )
         return
 
-    def install_package(self, name, url):
+    def generate_unique_oneapi_id(self, path):
+        hash_object = hashlib.md5(path.encode())
+        return hash_object.hexdigest()
+
+    def install_package(self, name, url, checksum):
         package_path = os.path.join(self.oneapi_dir, name)
         if Path(package_path).exists():
             print(
@@ -37,11 +41,13 @@ def install_package(self, name, url):
             )
             return
 
-        package = download(self.oneapi_dir, url, f"package_{name}.sh")
+        package = download(
+            self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum
+        )
         try:
             print(f"installing {name}")
             run(
-                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}"
+                f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}"
             )
         except:
             print("oneAPI installation likely exists already")
diff --git a/devops/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/utils/result.py
similarity index 69%
rename from devops/scripts/benchmarks/benches/result.py
rename to devops/scripts/benchmarks/utils/result.py
index 52a098d91c24a..4e65a3b8aa582 100644
--- a/devops/scripts/benchmarks/benches/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -3,9 +3,9 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Optional
-from dataclasses_json import dataclass_json
+from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
 
@@ -14,8 +14,8 @@
 class Result:
     label: str
     value: float
-    command: str
-    env: str
+    command: list[str]
+    env: dict[str, str]
     stdout: str
     passed: bool = True
     unit: str = ""
@@ -26,9 +26,8 @@ class Result:
     # values below should not be set by the benchmark
     name: str = ""
     lower_is_better: bool = True
-    git_hash: str = ""
-    date: Optional[datetime] = None
     suite: str = "Unknown"
+    description: str = "No description provided."
 
 
 @dataclass_json
@@ -37,4 +36,8 @@ class BenchmarkRun:
     results: list[Result]
     name: str = "This PR"
     git_hash: str = ""
-    date: datetime = None
+    github_repo: str = None
+    date: datetime = field(
+        default=None,
+        metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
+    )
diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
index d3d88f417cb8b..ba26127ce37b9 100644
--- a/devops/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -12,6 +12,7 @@
 import urllib  # nosec B404
 from options import options
 from pathlib import Path
+import hashlib
 
 
 def run(
@@ -42,6 +43,12 @@ def run(
 
         env.update(env_vars)
 
+        if options.verbose:
+            command_str = " ".join(command)
+            env_str = " ".join(f"{key}={value}" for key, value in env_vars.items())
+            full_command_str = f"{env_str} {command_str}".strip()
+            print(f"Running: {full_command_str}")
+
         result = subprocess.run(
             command,
             cwd=cwd,
@@ -104,7 +111,7 @@ def prepare_workdir(dir, version):
                 shutil.rmtree(dir)
         else:
             raise Exception(
-                f"The directory {dir} exists but is a benchmark work directory."
+                f"The directory {dir} exists but is not a benchmark work directory."
             )
 
     os.makedirs(dir)
@@ -125,11 +132,26 @@ def create_build_path(directory, name):
     return build_path
 
 
-def download(dir, url, file, untar=False, unzip=False):
+def calculate_checksum(file_path):
+    sha_hash = hashlib.sha384()
+    with open(file_path, "rb") as f:
+        for byte_block in iter(lambda: f.read(4096), b""):
+            sha_hash.update(byte_block)
+    return sha_hash.hexdigest()
+
+
+def download(dir, url, file, untar=False, unzip=False, checksum=""):
     data_file = os.path.join(dir, file)
     if not Path(data_file).exists():
         print(f"{data_file} does not exist, downloading")
         urllib.request.urlretrieve(url, data_file)
+        calculated_checksum = calculate_checksum(data_file)
+        if calculated_checksum != checksum:
+            print(
+                f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue."
+            )
+            exit(1)
+
         if untar:
             file = tarfile.open(data_file)
             file.extractall(dir)

From 3cbed5e3391366f16b6ff11d2a0d2e7a68511b58 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 13:51:42 -0800
Subject: [PATCH 005/114] Test UR benchmarking suite

---
 devops/actions/run-tests/benchmark/action.yml | 46 ++++++++++---------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 69631d044891c..9846b5c8bd6c6 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -47,6 +47,7 @@ runs:
           echo "#" ;;
       esac
   - name: Compute CPU core range to run benchmarks on
+    shell: bash
     run: |
       # Taken from ur-benchmark-reusable.yml:
 
@@ -89,27 +90,30 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
-  - name: Push compute-benchmarks results
-    if: always()
-    shell: bash
-    run: |
-      # TODO -- waiting on security clearance
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
-      cd "./llvm-ci-perf-results"
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      git add .
-      # Make sure changes have been made
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-      fi
+      mkdir -v ./llvm_test_workdir
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
+      echo "-----"
+      ls
+#  - name: Push compute-benchmarks results
+#    if: always()
+#    shell: bash
+#    run: |
+#      # TODO -- waiting on security clearance
+#      # Load configuration values
+#      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+#
+#      cd "./llvm-ci-perf-results"
+#      git config user.name "SYCL Benchmarking Bot"
+#      git config user.email "sys_sycl_benchmarks@intel.com"
+#      git pull
+#      git add .
+#      # Make sure changes have been made
+#      if git diff --quiet && git diff --cached --quiet; then
+#        echo "No new results added, skipping push."
+#      else
+#        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+#        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+#      fi
   - name: Find benchmark result artifact here
     if: always()
     shell: bash

From f79bbbfefe01c64963286c5aed5f84848b755200 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 14:49:57 -0800
Subject: [PATCH 006/114] Bump tolerance to 7%

---
 devops/benchmarking/config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
index c0b3ca9c31c9e..6b0ecc21f940f 100644
--- a/devops/benchmarking/config.ini
+++ b/devops/benchmarking/config.ini
@@ -23,7 +23,7 @@ recorded = Median,StdDev
 ; the historical average. Metrics not included here are not compared against
 ; when passing/failing benchmark results.
 ; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.5
+tolerances = Median:0.7
 
 ; Options for computing historical averages
 [average]

From ffc813919aa9f165b040fa11742d5bd909befabe Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 5 Mar 2025 14:50:30 -0800
Subject: [PATCH 007/114] Revert "Bump tolerance to 7%"

This reverts commit f79bbbfefe01c64963286c5aed5f84848b755200.
---
 devops/benchmarking/config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
index 6b0ecc21f940f..c0b3ca9c31c9e 100644
--- a/devops/benchmarking/config.ini
+++ b/devops/benchmarking/config.ini
@@ -23,7 +23,7 @@ recorded = Median,StdDev
 ; the historical average. Metrics not included here are not compared against
 ; when passing/failing benchmark results.
 ; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.7
+tolerances = Median:0.5
 
 ; Options for computing historical averages
 [average]

From 0a34e0d0914de06e0a086cbdcd44d0f1589447e2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 12:20:13 +0000
Subject: [PATCH 008/114] [benchmarks] fix failing benchmarks, improve html
 output

... and add presets to more easily
---
 devops/scripts/benchmarks/benches/compute.py  |  2 +-
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 13 ++--
 devops/scripts/benchmarks/benches/velocity.py |  2 +-
 devops/scripts/benchmarks/history.py          |  5 +-
 devops/scripts/benchmarks/html/config.js      |  7 +--
 devops/scripts/benchmarks/html/scripts.js     | 46 ++++++++------
 devops/scripts/benchmarks/main.py             | 38 ++++++-----
 devops/scripts/benchmarks/options.py          |  7 +--
 devops/scripts/benchmarks/output_html.py      | 63 +++++++++++--------
 devops/scripts/benchmarks/output_markdown.py  |  4 +-
 devops/scripts/benchmarks/presets.py          | 50 +++++++++++++++
 12 files changed, 153 insertions(+), 86 deletions(-)
 create mode 100644 devops/scripts/benchmarks/presets.py

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 18ed969728902..d35a8e2791648 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -170,7 +170,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index d8e0ab5d007bb..c12f811942849 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -139,7 +139,7 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    description=self.description()
+                    description=self.description(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 47326b2555a68..cc2db0a2fcf7c 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -105,7 +105,6 @@ def __init__(self, bench, name, test):
         self.bench = bench
         self.bench_name = name
         self.test = test
-        self.done = False
 
     def bin_args(self) -> list[str]:
         return []
@@ -119,8 +118,6 @@ def setup(self):
         )
 
     def run(self, env_vars) -> list[Result]:
-        if self.done:
-            return
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
 
         command = [
@@ -152,17 +149,17 @@ def run(self, env_vars) -> list[Result]:
                             unit="ms",
                         )
                     )
-        self.done = True
-        return res_list
 
-    def teardown(self):
-        print(f"Removing {self.outputfile}...")
         os.remove(self.outputfile)
-        return
+
+        return res_list
 
     def name(self):
         return f"{self.bench.name()} {self.test}"
 
+    def teardown(self):
+        return
+
 
 # multi benchmarks
 class Blocked_transform(SyclBenchmark):
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index be36c47ca36d5..652a831d0222e 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -136,7 +136,7 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                description=self.description()
+                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2bb0b9db8ea38..2b7002ed7faa9 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -61,11 +61,12 @@ def extract_index(file_path: Path) -> int:
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         try:
-            result = run("git rev-parse --short HEAD")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            result = run("git rev-parse --short HEAD", cwd=script_dir)
             git_hash = result.stdout.decode().strip()
 
             # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin")
+            remote_result = run("git remote get-url origin", cwd=script_dir)
             remote_url = remote_result.stdout.decode().strip()
 
             # Convert SSH or HTTPS URL to owner/repo format
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index c1210b2b21da5..3e67ae1dce8e5 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,5 +1,2 @@
-const config = {
-    remoteDataUrl: ''
-};
-// defaultCompareNames = [];
-// suiteNames = [];
+//remoteDataUrl = 'https://example.com/data.json';
+//defaultCompareNames = ['baseline'];
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 8f0272048136d..7b8b4d742cca2 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -114,14 +114,12 @@ function createChart(data, containerId, type) {
 
     const chartConfig = {
         type: type === 'time' ? 'line' : 'bar',
-        data: type === 'time' ?
-            {
-                datasets: createTimeseriesDatasets(data)
-            } :
-            {
-                labels: data.labels,
-                datasets: data.datasets
-            },
+        data: type === 'time' ? {
+            datasets: createTimeseriesDatasets(data)
+        } : {
+            labels: data.labels,
+            datasets: data.datasets
+        },
         options: options
     };
 
@@ -221,10 +219,12 @@ function createChartContainer(data, canvasId) {
     summary.appendChild(downloadButton);
     details.appendChild(summary);
 
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+
     // Create and append extra info
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
-    extraInfo.innerHTML = generateExtraInfo(data);
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
     details.appendChild(extraInfo);
 
     container.appendChild(details);
@@ -252,9 +252,8 @@ function createLatestRunsLookup(benchmarkRuns) {
 
     return latestRunsMap;
 }
-const latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
 
-function generateExtraInfo(data) {
+function generateExtraInfo(latestRunsLookup, data) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
@@ -283,7 +282,7 @@ function downloadChart(canvasId, label) {
     const chart = chartInstances.get(canvasId);
     if (chart) {
         const link = document.createElement('a');
-        link.href = chart.toBase64Image('image/jpeg', 1)
+        link.href = chart.toBase64Image('image/png', 1)
         link.download = `${label}.png`;
         link.click();
     }
@@ -445,6 +444,13 @@ function setupRunSelector() {
 function setupSuiteFilters() {
     suiteFiltersContainer = document.getElementById('suite-filters');
 
+    const suiteNames = new Set();
+    benchmarkRuns.forEach(run => {
+        run.results.forEach(result => {
+            suiteNames.add(result.suite);
+        });
+    });
+
     suiteNames.forEach(suite => {
         const label = document.createElement('label');
         const checkbox = document.createElement('input');
@@ -530,16 +536,18 @@ function loadData() {
     const loadingIndicator = document.getElementById('loading-indicator');
     loadingIndicator.style.display = 'block'; // Show loading indicator
 
-    if (config.remoteDataUrl && config.remoteDataUrl !== '') {
+    if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
         // Fetch data from remote URL
-        fetch(config.remoteDataUrl)
-            .then(response => response.text())
-            .then(scriptContent => {
-                // Evaluate the script content
-                eval(scriptContent);
+        fetch(remoteDataUrl)
+            .then(response => response.json())
+            .then(data => {
+                benchmarkRuns = data;
                 initializeCharts();
             })
-            .catch(error => console.error('Error fetching remote data:', error))
+            .catch(error => {
+                console.error('Error fetching remote data:', error);
+                loadingIndicator.textContent = 'Fetching remote data failed.';
+            })
             .finally(() => {
                 loadingIndicator.style.display = 'none'; // Hide loading indicator
             });
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 8f5330d7b4f62..716f162c48feb 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from presets import Presets
 
 import argparse
 import re
@@ -153,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            # TestSuite()
+            TestSuite()
         ]
         if not options.dry_run
         else []
@@ -163,6 +164,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
+        if s.name() not in options.preset.suites():
+            continue
+
         suite_benchmarks = s.benchmarks()
         if filter:
             suite_benchmarks = [
@@ -182,14 +186,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
                 print(f"{type(s).__name__} setup complete.")
                 benchmarks += suite_benchmarks
 
-    for b in benchmarks:
-        print(b.name())
-
     for benchmark in benchmarks:
         try:
-            print(f"Setting up {benchmark.name()}... ")
+            if options.verbose:
+                print(f"Setting up {benchmark.name()}... ")
             benchmark.setup()
-            print(f"{benchmark.name()} setup complete.")
+            if options.verbose:
+                print(f"{benchmark.name()} setup complete.")
 
         except Exception as e:
             if options.exit_on_failure:
@@ -279,8 +282,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if options.output_html:
         generate_html(history.runs, compare_names)
 
-        print(f"See {os.getcwd()}/html/index.html for the results.")
-
 
 def validate_and_parse_env_args(env_args):
     env_vars = {}
@@ -362,12 +363,6 @@ def validate_and_parse_env_args(env_args):
         help="Regex pattern to filter benchmarks by name.",
         default=None,
     )
-    parser.add_argument(
-        "--epsilon",
-        type=float,
-        help="Threshold to consider change of performance significant",
-        default=options.epsilon,
-    )
     parser.add_argument(
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
@@ -394,7 +389,11 @@ def validate_and_parse_env_args(env_args):
         help="Specify whether markdown output should fit the content size limit for request validation",
     )
     parser.add_argument(
-        "--output-html", help="Create HTML output", action="store_true", default=False
+        "--output-html",
+        help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.",
+        nargs="?",
+        const=options.output_html,
+        choices=["local", "remote"],
     )
     parser.add_argument(
         "--dry-run",
@@ -438,6 +437,13 @@ def validate_and_parse_env_args(env_args):
         help="Directory for cublas library",
         default=None,
     )
+    parser.add_argument(
+        "--preset",
+        type=str,
+        choices=[p.name for p in Presets],
+        help="Benchmark preset to run.",
+        default='FULL',
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -449,7 +455,6 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
-    options.epsilon = args.epsilon
     options.ur = args.ur
     options.ur_adapter = args.adapter
     options.exit_on_failure = args.exit_on_failure
@@ -464,6 +469,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
+    options.preset = Presets[args.preset].value()
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 206ca94eb0d0b..fd08ce83d145e 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-
+from presets import Preset
 
 class Compare(Enum):
     LATEST = "latest"
@@ -29,11 +29,9 @@ class Options:
     compare: Compare = Compare.LATEST
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
-    output_html: bool = False
+    output_html: str = "local"
     dry_run: bool = False
-    # these two should probably be merged into one setting
     stddev_threshold: float = 0.02
-    epsilon: float = 0.02
     iterations_stddev: int = 5
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
@@ -41,6 +39,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.12"
     build_igc: bool = False
     current_run_name: str = "This PR"
+    preset: Preset = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index e6e3212dbcdb2..d84fd659beb6c 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -5,34 +5,43 @@
 
 import json
 import os
+from options import options
 
 
 def generate_html(benchmark_runs: list, compare_names: list[str]):
-
-    # Get unique suite names
-    suite_names = {result.suite for run in benchmark_runs for result in run.results}
-
     # create path to data.js in html folder
-    data_path = os.path.join(os.path.dirname(__file__), "html", "data.js")
-
-    # Write data to js file
-    # We can't store this as a standalone json file because it needs to be inline in the html
-    with open(data_path, "w") as f:
-        f.write("const benchmarkRuns = [\n")
-        # it might be tempting to just to create a list and convert
-        # that to a json, but that leads to json being serialized twice.
-        for i, run in enumerate(benchmark_runs):
-            if i > 0:
-                f.write(",\n")
-            f.write(run.to_json())
-
-        f.write("\n];\n\n")  # terminates benchmarkRuns
-
-        # these are not const because they might be modified
-        # in config.js
-        f.write("defaultCompareNames = ")
-        json.dump(compare_names, f)
-        f.write(";\n\n")  # terminates defaultCompareNames
-        f.write("suiteNames = ")
-        json.dump(list(suite_names), f)
-        f.write(";")  # terminates suiteNames
+    html_path = os.path.join(os.path.dirname(__file__), "html")
+
+    if options.output_html == "local":
+        data_path = os.path.join(html_path, "data.js")
+        # Write data to js file
+        # We can't store this as a standalone json file because it needs to be inline in the html
+        with open(data_path, "w") as f:
+            f.write("benchmarkRuns = [\n")
+            # it might be tempting to just to create a list and convert
+            # that to a json, but that leads to json being serialized twice.
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+
+            f.write("\n];\n\n")  # terminates benchmarkRuns
+
+            f.write("defaultCompareNames = ")
+            json.dump(compare_names, f)
+            f.write(";\n")  # terminates defaultCompareNames
+
+        print(f"See {os.getcwd()}/html/index.html for the results.")
+    else:
+        data_path = os.path.join(html_path, "data.json")
+        with open(data_path, "w") as f:
+            f.write("[\n")
+            for i, run in enumerate(benchmark_runs):
+                if i > 0:
+                    f.write(",\n")
+                f.write(run.to_json())
+            f.write("\n];\n")
+
+        print(
+            f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
+        )
diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py
index 18b5779473a75..3295968603d0c 100644
--- a/devops/scripts/benchmarks/output_markdown.py
+++ b/devops/scripts/benchmarks/output_markdown.py
@@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int):
         "\n<details>\n"
         "<summary>\n"
         f"{title} {rows_count} "
-        f"(threshold {options.epsilon*100:.2f}%)\n"
+        f"(threshold {options.stddev_threshold*100:.2f}%)\n"
         "</summary>\n\n"
     )
 
@@ -265,7 +265,7 @@ def generate_summary_table(
                 delta = oln.diff - 1
                 oln.row += f" {delta*100:.2f}%"
 
-                if abs(delta) > options.epsilon:
+                if abs(delta) > options.stddev_threshold:
                     if delta > 0:
                         improved_rows.append(oln.row + " | \n")
                     else:
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
new file mode 100644
index 0000000000000..46f8257cd01ec
--- /dev/null
+++ b/devops/scripts/benchmarks/presets.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
+# See LICENSE.TXT
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from enum import Enum
+
+class Preset():
+    def description(self):
+        pass
+    def suites(self) -> list[str]:
+        return []
+
+class Full(Preset):
+    def description(self):
+        return "All available benchmarks."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF']
+
+class SYCL(Preset):
+    def description(self):
+        return "All available benchmarks related to SYCL."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench']
+
+class Minimal(Preset):
+    def description(self):
+        return "Short microbenchmarks."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks']
+
+class Normal(Preset):
+    def description(self):
+        return "Comprehensive mix of microbenchmarks and real applications."
+    def suites(self) -> list[str]:
+        return ['Compute Benchmarks']
+
+class Test(Preset):
+    def description(self):
+        return "Noop benchmarks for framework testing."
+    def suites(self) -> list[str]:
+        return ['Test Suite']
+
+
+class Presets(Enum):
+    FULL = Full
+    SYCL = SYCL # Nightly
+    NORMAL = Normal # PR
+    MINIMAL = Minimal # Quick smoke tests
+    TEST = Test

From 3f42420d95522557ff09c45aa5db480d1f636eda Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 13:47:30 +0000
Subject: [PATCH 009/114] [benchmarks] fix python formatting with black

---
 devops/scripts/benchmarks/main.py    |  4 ++--
 devops/scripts/benchmarks/options.py |  1 +
 devops/scripts/benchmarks/presets.py | 36 +++++++++++++++++++++-------
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 716f162c48feb..4df66d7ad9c4c 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -154,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             SyclBench(directory),
             LlamaCppBench(directory),
             UMFSuite(directory),
-            TestSuite()
+            TestSuite(),
         ]
         if not options.dry_run
         else []
@@ -442,7 +442,7 @@ def validate_and_parse_env_args(env_args):
         type=str,
         choices=[p.name for p in Presets],
         help="Benchmark preset to run.",
-        default='FULL',
+        default="FULL",
     )
 
     args = parser.parse_args()
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index fd08ce83d145e..7f4f3a9a32eb3 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -2,6 +2,7 @@
 from enum import Enum
 from presets import Preset
 
+
 class Compare(Enum):
     LATEST = "latest"
     AVERAGE = "average"
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 46f8257cd01ec..4db68a925a54e 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -5,46 +5,64 @@
 
 from enum import Enum
 
-class Preset():
+
+class Preset:
     def description(self):
         pass
+
     def suites(self) -> list[str]:
         return []
 
+
 class Full(Preset):
     def description(self):
         return "All available benchmarks."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF']
+        return [
+            "Compute Benchmarks",
+            "llama.cpp bench",
+            "SYCL-Bench",
+            "Velocity Bench",
+            "UMF",
+        ]
+
 
 class SYCL(Preset):
     def description(self):
         return "All available benchmarks related to SYCL."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench']
+        return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
+
 
 class Minimal(Preset):
     def description(self):
         return "Short microbenchmarks."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks']
+        return ["Compute Benchmarks"]
+
 
 class Normal(Preset):
     def description(self):
         return "Comprehensive mix of microbenchmarks and real applications."
+
     def suites(self) -> list[str]:
-        return ['Compute Benchmarks']
+        return ["Compute Benchmarks"]
+
 
 class Test(Preset):
     def description(self):
         return "Noop benchmarks for framework testing."
+
     def suites(self) -> list[str]:
-        return ['Test Suite']
+        return ["Test Suite"]
 
 
 class Presets(Enum):
     FULL = Full
-    SYCL = SYCL # Nightly
-    NORMAL = Normal # PR
-    MINIMAL = Minimal # Quick smoke tests
+    SYCL = SYCL  # Nightly
+    NORMAL = Normal  # PR
+    MINIMAL = Minimal  # Quick smoke tests
     TEST = Test

From 1c7b189db0c8a8d2883ced52ac3e2b45840c792d Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 14:35:11 +0000
Subject: [PATCH 010/114] update driver version

---
 devops/scripts/benchmarks/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 7f4f3a9a32eb3..aba5aac434917 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -37,7 +37,7 @@ class Options:
     build_compute_runtime: bool = False
     extra_ld_libraries: list[str] = field(default_factory=list)
     extra_env_vars: dict = field(default_factory=dict)
-    compute_runtime_tag: str = "25.05.32567.12"
+    compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: Preset = None

From ad13e93adf8cabd17a7f384f68d509fdbc58a134 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 6 Mar 2025 15:02:42 +0000
Subject: [PATCH 011/114] simplify preset implementation and fix normal preset

---
 devops/scripts/benchmarks/main.py    |  8 ++---
 devops/scripts/benchmarks/options.py |  4 +--
 devops/scripts/benchmarks/presets.py | 51 +++++++++++++---------------
 3 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 4df66d7ad9c4c..11f02d627a87f 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,7 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
-from presets import Presets
+from presets import preset_get_by_name, presets
 
 import argparse
 import re
@@ -440,9 +440,9 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--preset",
         type=str,
-        choices=[p.name for p in Presets],
+        choices=[p.name() for p in presets],
         help="Benchmark preset to run.",
-        default="FULL",
+        default=options.preset.name(),
     )
 
     args = parser.parse_args()
@@ -469,7 +469,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
-    options.preset = Presets[args.preset].value()
+    options.preset = preset_get_by_name(args.preset)
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index aba5aac434917..428ab1f13e9af 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from presets import Preset
+from presets import Preset, presets
 
 
 class Compare(Enum):
@@ -40,7 +40,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
-    preset: Preset = None
+    preset: Preset = presets[0]
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 4db68a925a54e..54727446ecc7d 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,22 +3,23 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from enum import Enum
-
+from typing import List, Type
 
 class Preset:
-    def description(self):
-        pass
+    def description(self) -> str:
+        raise NotImplementedError
 
-    def suites(self) -> list[str]:
-        return []
+    def name(self) -> str:
+        return self.__class__.__name__
 
+    def suites(self) -> List[str]:
+        raise NotImplementedError
 
 class Full(Preset):
-    def description(self):
+    def description(self) -> str:
         return "All available benchmarks."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return [
             "Compute Benchmarks",
             "llama.cpp bench",
@@ -27,42 +28,38 @@ def suites(self) -> list[str]:
             "UMF",
         ]
 
-
 class SYCL(Preset):
-    def description(self):
+    def description(self) -> str:
         return "All available benchmarks related to SYCL."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
 
-
 class Minimal(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Short microbenchmarks."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Compute Benchmarks"]
 
-
 class Normal(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Comprehensive mix of microbenchmarks and real applications."
 
-    def suites(self) -> list[str]:
-        return ["Compute Benchmarks"]
-
+    def suites(self) -> List[str]:
+        return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"]
 
 class Test(Preset):
-    def description(self):
+    def description(self) -> str:
         return "Noop benchmarks for framework testing."
 
-    def suites(self) -> list[str]:
+    def suites(self) -> List[str]:
         return ["Test Suite"]
 
+presets = [Full(), SYCL(), Minimal(), Normal(), Test()]
 
-class Presets(Enum):
-    FULL = Full
-    SYCL = SYCL  # Nightly
-    NORMAL = Normal  # PR
-    MINIMAL = Minimal  # Quick smoke tests
-    TEST = Test
+def preset_get_by_name(name: str) -> Preset:
+    for p in presets:
+        if p.name().upper() == name.upper():
+            return p
+    raise ValueError(f"Preset '{name}' not found.")

From 68ed0c4e6bcf1a06bd924e0d96731e52513ae1eb Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:44:52 -0800
Subject: [PATCH 012/114] Add PVC and BMG as runners

---
 .github/workflows/sycl-linux-run-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 9bad484b1c12e..5797755934a0c 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -126,6 +126,8 @@ on:
           - '["cts-cpu"]'
           - '["Linux", "build"]'
           - '["cuda"]'
+          - '["Linux", "bmg"]'
+          - '["PVC_PERF"]'
       image:
         type: choice
         options:

From 3a65b98b4558c56f9aeca5d1b33393715764c361 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:49:20 -0800
Subject: [PATCH 013/114] Install dependencies before running UR script

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 9846b5c8bd6c6..41f9e68f3609d 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      mkdir -v ./llvm_test_workdir
+      pip install -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 220121aa4229bb8d2e6517bb84112b48fa14317b Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 14:58:52 -0800
Subject: [PATCH 014/114] Use venv for python packages

---
 devops/actions/run-tests/benchmark/action.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 41f9e68f3609d..afd5ede276228 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,6 +90,8 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
+      python3 -m venv .venv
+      . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"

From 37d361cac120e662c1905acd22542014ac1ac73c Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 6 Mar 2025 15:01:21 -0800
Subject: [PATCH 015/114] Install venv before using venv

---
 devops/actions/run-tests/benchmark/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index afd5ede276228..88f2e75942c4d 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,6 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
+      apt install -y python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 07f1e107a78f84e320379a5b01e4f92b159964cb Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Fri, 7 Mar 2025 12:07:52 +0000
Subject: [PATCH 016/114] [benchmarks] allow specifying custom results
 directories

---
 devops/scripts/benchmarks/html/data.js   |  2 ++
 devops/scripts/benchmarks/main.py        | 12 +++++++++++-
 devops/scripts/benchmarks/options.py     |  1 +
 devops/scripts/benchmarks/output_html.py |  2 +-
 devops/scripts/benchmarks/presets.py     |  2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 devops/scripts/benchmarks/html/data.js

diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
new file mode 100644
index 0000000000000..36e076361fe17
--- /dev/null
+++ b/devops/scripts/benchmarks/html/data.js
@@ -0,0 +1,2 @@
+benchmarkRuns = [];
+defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 11f02d627a87f..43e0bdf4832b1 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -242,7 +242,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     if not options.dry_run:
         chart_data = {this_name: results}
 
-    history = BenchmarkHistory(directory)
+    results_dir = directory
+    if options.custom_results_dir:
+        results_dir = Path(options.custom_results_dir)
+    history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
     history.load(1000)
@@ -444,6 +447,12 @@ def validate_and_parse_env_args(env_args):
         help="Benchmark preset to run.",
         default=options.preset.name(),
     )
+    parser.add_argument(
+        "--results-dir",
+        type=str,
+        help="Specify a custom results directory",
+        default=options.custom_results_dir,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -470,6 +479,7 @@ def validate_and_parse_env_args(env_args):
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
     options.preset = preset_get_by_name(args.preset)
+    options.custom_results_dir = args.results_dir
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 428ab1f13e9af..c567a4a2bda53 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -41,6 +41,7 @@ class Options:
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: Preset = presets[0]
+    custom_results_dir = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index d84fd659beb6c..35fbc2ffb122a 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -40,7 +40,7 @@ def generate_html(benchmark_runs: list, compare_names: list[str]):
                 if i > 0:
                     f.write(",\n")
                 f.write(run.to_json())
-            f.write("\n];\n")
+            f.write("\n]\n")
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 54727446ecc7d..5d8e187ac0115 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -60,6 +60,6 @@ def suites(self) -> List[str]:
 
 def preset_get_by_name(name: str) -> Preset:
     for p in presets:
-        if p.name().upper() == name.upper():
+        if p.name() == name:
             return p
     raise ValueError(f"Preset '{name}' not found.")

From 64cf79cb84e8f4a2bc108a8b93cb264adeef6579 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Fri, 7 Mar 2025 15:17:23 +0000
Subject: [PATCH 017/114] [benchmarks] sort runs by date for html output

---
 devops/scripts/benchmarks/html/data.js   | 16 +++++++++++++++-
 devops/scripts/benchmarks/output_html.py |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
index 36e076361fe17..bd2a4bb9c6f36 100644
--- a/devops/scripts/benchmarks/html/data.js
+++ b/devops/scripts/benchmarks/html/data.js
@@ -1,2 +1,16 @@
-benchmarkRuns = [];
+benchmarkRuns = [
+{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"},
+{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"}
+];
+
 defaultCompareNames = [];
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 35fbc2ffb122a..53dd4b1e8f968 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -11,6 +11,7 @@
 def generate_html(benchmark_runs: list, compare_names: list[str]):
     # create path to data.js in html folder
     html_path = os.path.join(os.path.dirname(__file__), "html")
+    benchmark_runs.sort(key=lambda run: run.date, reverse=True)
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")

From 6c28d333dadab0eccd40a80f2f84aa50107e3b93 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 10 Mar 2025 11:21:53 +0000
Subject: [PATCH 018/114] simplify presets, remove suites if all set

---
 devops/scripts/benchmarks/html/scripts.js |  4 +-
 devops/scripts/benchmarks/main.py         | 10 +--
 devops/scripts/benchmarks/options.py      |  4 +-
 devops/scripts/benchmarks/presets.py      | 91 ++++++++---------------
 4 files changed, 41 insertions(+), 68 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 7b8b4d742cca2..7ba00738e727a 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -6,6 +6,7 @@
 // Core state
 let activeRuns = new Set(defaultCompareNames);
 let chartInstances = new Map();
+let suiteNames = new Set();
 let timeseriesData, barChartsData, allRunNames;
 
 // DOM Elements
@@ -306,7 +307,7 @@ function updateURL() {
         url.searchParams.delete('regex');
     }
 
-    if (activeSuites.length > 0) {
+    if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) {
         url.searchParams.set('suites', activeSuites.join(','));
     } else {
         url.searchParams.delete('suites');
@@ -444,7 +445,6 @@ function setupRunSelector() {
 function setupSuiteFilters() {
     suiteFiltersContainer = document.getElementById('suite-filters');
 
-    const suiteNames = new Set();
     benchmarkRuns.forEach(run => {
         run.results.forEach(result => {
             suiteNames.add(result.suite);
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 43e0bdf4832b1..91f84917f8698 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,7 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
-from presets import preset_get_by_name, presets
+from presets import enabled_suites, presets
 
 import argparse
 import re
@@ -164,7 +164,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     failures = {}
 
     for s in suites:
-        if s.name() not in options.preset.suites():
+        if s.name() not in enabled_suites(options.preset):
             continue
 
         suite_benchmarks = s.benchmarks()
@@ -443,9 +443,9 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--preset",
         type=str,
-        choices=[p.name() for p in presets],
+        choices=[p for p in presets.keys()],
         help="Benchmark preset to run.",
-        default=options.preset.name(),
+        default=options.preset,
     )
     parser.add_argument(
         "--results-dir",
@@ -478,7 +478,7 @@ def validate_and_parse_env_args(env_args):
     options.current_run_name = args.relative_perf
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
-    options.preset = preset_get_by_name(args.preset)
+    options.preset = args.preset
     options.custom_results_dir = args.results_dir
 
     if args.build_igc and args.compute_runtime is None:
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index c567a4a2bda53..7600942acd1e5 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from presets import Preset, presets
 
+from presets import presets
 
 class Compare(Enum):
     LATEST = "latest"
@@ -40,7 +40,7 @@ class Options:
     compute_runtime_tag: str = "25.05.32567.18"
     build_igc: bool = False
     current_run_name: str = "This PR"
-    preset: Preset = presets[0]
+    preset: str = "Full"
     custom_results_dir = None
 
 
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 5d8e187ac0115..7f5dc8d78460a 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,63 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from typing import List, Type
+presets: dict[str, list[str]] = {
+    "Full": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+        "UMF",
+    ],
+    "SYCL": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "SYCL-Bench",
+        "Velocity Bench",
+    ],
+    "Minimal": [
+        "Compute Benchmarks",
+    ],
+    "Normal": [
+        "Compute Benchmarks",
+        "llama.cpp bench",
+        "Velocity Bench",
+    ],
+    "Test": [
+        "Test Suite",
+    ],
+}
+
+def enabled_suites(preset: str) -> list[str]:
+    try:
+        return presets[preset]
+    except KeyError:
+        raise ValueError(f"Preset '{preset}' not found.")
 
-class Preset:
-    def description(self) -> str:
-        raise NotImplementedError
-
-    def name(self) -> str:
-        return self.__class__.__name__
-
-    def suites(self) -> List[str]:
-        raise NotImplementedError
-
-class Full(Preset):
-    def description(self) -> str:
-        return "All available benchmarks."
-
-    def suites(self) -> List[str]:
-        return [
-            "Compute Benchmarks",
-            "llama.cpp bench",
-            "SYCL-Bench",
-            "Velocity Bench",
-            "UMF",
-        ]
-
-class SYCL(Preset):
-    def description(self) -> str:
-        return "All available benchmarks related to SYCL."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"]
-
-class Minimal(Preset):
-    def description(self) -> str:
-        return "Short microbenchmarks."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks"]
-
-class Normal(Preset):
-    def description(self) -> str:
-        return "Comprehensive mix of microbenchmarks and real applications."
-
-    def suites(self) -> List[str]:
-        return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"]
-
-class Test(Preset):
-    def description(self) -> str:
-        return "Noop benchmarks for framework testing."
-
-    def suites(self) -> List[str]:
-        return ["Test Suite"]
-
-presets = [Full(), SYCL(), Minimal(), Normal(), Test()]
-
-def preset_get_by_name(name: str) -> Preset:
-    for p in presets:
-        if p.name() == name:
-            return p
-    raise ValueError(f"Preset '{name}' not found.")

From e15b94ffa6a0bc297b437f35f8afa3885befdb57 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 10 Mar 2025 11:25:23 +0000
Subject: [PATCH 019/114] [benchmarks] use python venv for scripts

---
 .github/workflows/ur-benchmarks-reusable.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 6e8a4ea535d15..e3a754753ecf8 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -80,9 +80,13 @@ jobs:
         git checkout origin/pr/${{ inputs.pr_no }}/merge
         git rev-parse origin/pr/${{ inputs.pr_no }}/merge
 
-    - name: Install pip packages
+    - name: Create virtual environment
+      run: python -m venv .venv
+
+    - name: Activate virtual environment and install pip packages
       run: |
-        pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+        source .venv/bin/activate
+        pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -139,6 +143,7 @@ jobs:
       working-directory: ${{ github.workspace }}
       id: benchmarks
       run: >
+        source .venv/bin/activate &&
         taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build

From 78fd037de376d59a404965817d464edd31bb6890 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 07:53:36 -0700
Subject: [PATCH 020/114] Run apt with sudo

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 88f2e75942c4d..5c343f2ff8e26 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      apt install -y python3-venv
+      sudo apt install -y python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 82b6e55be0f627a301117da05de16bc2ed723b70 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 08:16:58 -0700
Subject: [PATCH 021/114] Ignore "missing" apt packages in workflow

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 5c343f2ff8e26..87a629dc60fd6 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      sudo apt install -y python3-venv
+      sudo apt install -y --ignore-missing python3-venv
       python3 -m venv .venv
       . .venv/bin/activate
       pip install -r ./devops/scripts/benchmarks/requirements.txt

From 162cba01ca234ab7645cf59c9d7b82d512870c69 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 09:48:32 -0700
Subject: [PATCH 022/114] Change pip to install to user

---
 devops/actions/run-tests/benchmark/action.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 87a629dc60fd6..34fdf178afe0e 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,10 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      sudo apt install -y --ignore-missing python3-venv
-      python3 -m venv .venv
-      . .venv/bin/activate
-      pip install -r ./devops/scripts/benchmarks/requirements.txt
+      pip install --user -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 848f7410b0e7823eb94d288d549474785a339a30 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 09:51:09 -0700
Subject: [PATCH 023/114] Ignore system controlled python env

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 34fdf178afe0e..79cb2bf4aea5b 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -90,7 +90,7 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
-      pip install --user -r ./devops/scripts/benchmarks/requirements.txt
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
       echo "-----"
       ls

From 918604ebd0a22f51be67055b2eea7c877e84a943 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 10 Mar 2025 13:08:09 -0700
Subject: [PATCH 024/114] [CI] use realpaths when referring to SYCL

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 79cb2bf4aea5b..92c948ffd7168 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -91,7 +91,7 @@ runs:
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline
       echo "-----"
       ls
 #  - name: Push compute-benchmarks results

From 72d873034ae844678090c0f4c7082a2a8d893b99 Mon Sep 17 00:00:00 2001
From: Ian Li <ian.li@intel.com>
Date: Mon, 10 Mar 2025 18:10:26 -0400
Subject: [PATCH 025/114] [CI] use minimal preset when running benchmarks

---
 devops/actions/run-tests/benchmark/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 92c948ffd7168..c10a163261c13 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -91,7 +91,7 @@ runs:
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal
       echo "-----"
       ls
 #  - name: Push compute-benchmarks results

From 066f5a60c102669878188cc76532c0f57e2e55bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= <lukasz.stolarczuk@intel.com>
Date: Wed, 12 Mar 2025 11:33:44 +0100
Subject: [PATCH 026/114] [CI] Allow 2 bench scripts locations (#17394)

On PRs based on main, the scripts location is "old" and not accesible.
Pick location based on the dir existance. Step 'gather info' is in
a 'weird' location, so solve it with 2 tries to execute the script.
---
 .github/workflows/ur-benchmarks-reusable.yml | 23 +++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index e3a754753ecf8..d7c32edfdfc2a 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -80,13 +80,27 @@ jobs:
         git checkout origin/pr/${{ inputs.pr_no }}/merge
         git rev-parse origin/pr/${{ inputs.pr_no }}/merge
 
+    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
+    - name: Establish bench scripts location
+      run: |
+        if [ -d "${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" ]; then
+          echo "Bench scripts are in devops/scripts"
+          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" >> $GITHUB_ENV
+        elif [ -d "${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" ]; then
+          echo "Bench scripts are in unified-runtime/scripts"
+          echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" >> $GITHUB_ENV
+        else
+          echo "Bench scripts are absent...?"
+          exit 1
+        fi
+
     - name: Create virtual environment
       run: python -m venv .venv
 
     - name: Activate virtual environment and install pip packages
       run: |
         source .venv/bin/activate
-        pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt
+        pip install -r ${BENCH_SCRIPTS_DIR}/requirements.txt
 
     - name: Configure SYCL
       run: >
@@ -144,7 +158,7 @@ jobs:
       id: benchmarks
       run: >
         source .venv/bin/activate &&
-        taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py
+        taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
         --ur ${{ github.workspace }}/ur_install
@@ -198,6 +212,9 @@ jobs:
         path: benchmark_results_${{ inputs.pr_no }}.html
         key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }}
 
+    # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh
+      run: |
+        ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh || true
+        ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh || true

From 18e5291a405ce1c912d0df3ac02aa5446c099ef8 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Wed, 12 Mar 2025 14:28:08 +0000
Subject: [PATCH 027/114] add ulls compute benchmarks

---
 devops/scripts/benchmarks/benches/compute.py | 62 ++++++++++++++++++++
 devops/scripts/benchmarks/html/data.js       | 15 +----
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index d35a8e2791648..92818cc00fad2 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -81,6 +81,10 @@ def benchmarks(self) -> list[Benchmark]:
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
             GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
+            UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256),
+            UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256),
+            UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1),
+            UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
         if options.ur is not None:
@@ -531,3 +535,61 @@ def bin_args(self) -> list[str]:
             "--withCopyOffload=1",
             "--immediateAppendCmdList=0",
         ]
+
+class UllsEmptyKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
+        self.wgc = wgc
+        self.wgs = wgs
+        self.runtime = runtime
+        super().__init__(
+            bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel"
+        )
+
+    def explicit_group(self):
+        return f"EmptyKernel {self.wgc} {self.wgs}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--wgs={self.wgs}",
+            f"--wgc={self.wgs}",
+        ]
+
+class UllsKernelSwitch(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents):
+        self.count = count
+        self.kernelTime = kernelTime
+        self.barrier = barrier
+        self.hostVisible = hostVisible
+        self.ctrBasedEvents = ctrBasedEvents
+        self.runtime = runtime
+        self.ioq = ioq
+        super().__init__(
+            bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch"
+        )
+
+    def explicit_group(self):
+        return f"KernelSwitch {self.count} {self.kernelTime}"
+
+    def description(self) -> str:
+        return ""
+
+    def name(self):
+        return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=1000",
+            f"--count={self.count}",
+            f"--kernelTime={self.kernelTime}",
+            f"--barrier={self.barrier}",
+            f"--hostVisible={self.hostVisible}",
+            f"--ioq={self.ioq}",
+            f"--ctrBasedEvents={self.ctrBasedEvents}",
+        ]
diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js
index bd2a4bb9c6f36..a5b96c72834ba 100644
--- a/devops/scripts/benchmarks/html/data.js
+++ b/devops/scripts/benchmarks/html/data.js
@@ -1,16 +1,3 @@
-benchmarkRuns = [
-{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"},
-{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"}
-];
+benchmarkRuns = [];
 
 defaultCompareNames = [];

From 237750e9dc03ce8534d373c984e9fd8c56a72d4f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 11 Mar 2025 15:07:14 -0700
Subject: [PATCH 028/114] [CI][Benchmark] Decouple results from existing file
 structure, fetch results from git instead of local

---
 devops/actions/run-tests/benchmark/action.yml | 112 +++++++++---------
 devops/scripts/benchmarks/main.py             |  23 +++-
 devops/scripts/benchmarks/options.py          |   1 +
 devops/scripts/benchmarks/output_html.py      |   4 +-
 4 files changed, 81 insertions(+), 59 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index c10a163261c13..f90808f730787 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,16 +27,25 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,11 +55,11 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      # Taken from ur-benchmark-reusable.yml:
-
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -67,65 +76,62 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
-
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
       echo "-----"
       ls
-#  - name: Push compute-benchmarks results
-#    if: always()
-#    shell: bash
-#    run: |
-#      # TODO -- waiting on security clearance
-#      # Load configuration values
-#      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-#
-#      cd "./llvm-ci-perf-results"
-#      git config user.name "SYCL Benchmarking Bot"
-#      git config user.email "sys_sycl_benchmarks@intel.com"
-#      git pull
-#      git add .
-#      # Make sure changes have been made
-#      if git diff --quiet && git diff --cached --quiet; then
-#        echo "No new results added, skipping push."
-#      else
-#        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-#        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-#      fi
-  - name: Find benchmark result artifact here
+  - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+      fi
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 91f84917f8698..1a15e5407daf3 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -265,11 +265,15 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             this_name, chart_data, failures, options.output_markdown
         )
 
-        with open("benchmark_results.md", "w") as file:
+        md_path = options.output_directory
+        if options.output_directory is None:
+            md_path = os.getcwd()
+
+        with open(os.path.join(md_path, "benchmark_results.md"), "w") as file:
             file.write(markdown_content)
 
         print(
-            f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md"
+            f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md"
         )
 
     saved_name = save_name if save_name is not None else this_name
@@ -283,7 +287,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             compare_names.append(saved_name)
 
     if options.output_html:
-        generate_html(history.runs, compare_names)
+        html_path = options.output_directory
+        if options.output_directory is None:
+            html_path = os.path.join(os.path.dirname(__file__), "html")
+        generate_html(history.runs, compare_names, html_path)
 
 
 def validate_and_parse_env_args(env_args):
@@ -398,6 +405,12 @@ def validate_and_parse_env_args(env_args):
         const=options.output_html,
         choices=["local", "remote"],
     )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        help="Location for output files, if --output-html or --output_markdown was specified.",
+        default=None
+    )
     parser.add_argument(
         "--dry-run",
         help="Do not run any actual benchmarks",
@@ -486,6 +499,10 @@ def validate_and_parse_env_args(env_args):
     if args.compute_runtime is not None:
         options.build_compute_runtime = True
         options.compute_runtime_tag = args.compute_runtime
+    if args.output_dir is not None:
+        if not os.path.isdir(args.output_dir):
+            parser.error("Specified --output-dir is not a valid path")
+        options.output_directory = os.path.abspath(args.output_dir)
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 7600942acd1e5..332d1615bc78d 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -31,6 +31,7 @@ class Options:
     compare_max: int = 10  # average/median over how many results
     output_markdown: MarkdownSize = MarkdownSize.SHORT
     output_html: str = "local"
+    output_directory: str = None
     dry_run: bool = False
     stddev_threshold: float = 0.02
     iterations_stddev: int = 5
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 53dd4b1e8f968..49b4d1d84a214 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -8,9 +8,7 @@
 from options import options
 
 
-def generate_html(benchmark_runs: list, compare_names: list[str]):
-    # create path to data.js in html folder
-    html_path = os.path.join(os.path.dirname(__file__), "html")
+def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
 
     if options.output_html == "local":

From ba1297fe66693ef025b2aa6c14ebfc17bf2c3651 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 12 Mar 2025 09:00:38 -0700
Subject: [PATCH 029/114] [benchmark] Disabling UR test suites

---
 .github/workflows/ur-benchmarks-reusable.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index d7c32edfdfc2a..0aecbffc20fe7 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -161,7 +161,6 @@ jobs:
         taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
-        --ur ${{ github.workspace }}/ur_install
         --adapter ${{ matrix.adapter.str_name }}
         --compare baseline
         --compute-runtime ${{ inputs.compute_runtime_commit }}
@@ -169,6 +168,9 @@ jobs:
         ${{ inputs.upload_report && '--output-html' || '' }}
         ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
+        # Temporarily disabled due to build faiures
+        # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849
+        # --ur ${{ github.workspace }}/ur_install
 
     - name: Print benchmark results
       run: |

From cd6097fdef7d77213b7a21658ca2e040fd9cf825 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 11:42:00 +0000
Subject: [PATCH 030/114] update compute benchmarks and fix requirements

---
 devops/scripts/benchmarks/benches/compute.py | 2 +-
 devops/scripts/benchmarks/requirements.txt   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 92818cc00fad2..4b48f16b5fc6b 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,7 +28,7 @@ def setup(self):
             self.directory,
             "compute-benchmarks-repo",
             "https://github.com/intel/compute-benchmarks.git",
-            "9369275026229b182bc4a555b73c2ec995a9e2b7",
+            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
index 99ba0caab55c2..9f0381ceef6c2 100644
--- a/devops/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -2,3 +2,4 @@ matplotlib==3.9.2
 mpld3==0.5.10
 dataclasses-json==0.6.7
 PyYAML==6.0.1
+Mako==1.3.9

From c4e92c6ac7a64ae26f9c15ea383473b71637c1e2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:09:48 +0000
Subject: [PATCH 031/114] fix url updates

---
 devops/scripts/benchmarks/html/scripts.js | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 7ba00738e727a..2bd52a70b07c8 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -13,12 +13,13 @@ let timeseriesData, barChartsData, allRunNames;
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
 // Run selector functions
-function updateSelectedRuns() {
+function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
     activeRuns.forEach(name => {
         selectedRunsDiv.appendChild(createRunElement(name));
     });
-    updateCharts();
+    if (forceUpdate)
+        updateCharts();
 }
 
 function createRunElement(name) {
@@ -439,7 +440,7 @@ function setupRunSelector() {
         runSelect.appendChild(option);
     });
 
-    updateSelectedRuns();
+    updateSelectedRuns(false);
 }
 
 function setupSuiteFilters() {

From ed8eecce3d20e19f471ec65bb59b851bd215b486 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:24:41 +0000
Subject: [PATCH 032/114] use timestamps in result file names

---
 devops/scripts/benchmarks/history.py      | 26 +++++++++--------------
 devops/scripts/benchmarks/utils/result.py |  1 +
 2 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2b7002ed7faa9..2ed63d129d140 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -13,7 +13,6 @@
 
 
 class BenchmarkHistory:
-    benchmark_run_index_max = 0
     runs = []
 
     def __init__(self, dir):
@@ -35,28 +34,22 @@ def load(self, n: int):
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
 
-        # Extract index numbers and sort files by index number
-        def extract_index(file_path: Path) -> int:
+        # Extract timestamp and sort files by it
+        def extract_timestamp(file_path: Path) -> str:
             try:
-                return int(file_path.stem.split("_")[0])
-            except (IndexError, ValueError):
-                return -1
+                return file_path.stem.split("_")[-1]
+            except IndexError:
+                return ""
 
-        benchmark_files = [
-            file for file in benchmark_files if extract_index(file) != -1
-        ]
-        benchmark_files.sort(key=extract_index)
+        benchmark_files.sort(key=extract_timestamp, reverse=True)
 
         # Load the first n benchmark files
         benchmark_runs = []
-        for file_path in benchmark_files[n::-1]:
+        for file_path in benchmark_files[:n]:
             benchmark_run = self.load_result(file_path)
             if benchmark_run:
                 benchmark_runs.append(benchmark_run)
 
-        if benchmark_files:
-            self.benchmark_run_index_max = extract_index(benchmark_files[-1])
-
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
@@ -102,10 +95,11 @@ def save(self, save_name, results: list[Result], to_file=True):
         results_dir = Path(os.path.join(self.dir, "results"))
         os.makedirs(results_dir, exist_ok=True)
 
-        self.benchmark_run_index_max += 1
+        # Use formatted timestamp for the filename
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         file_path = Path(
             os.path.join(
-                results_dir, f"{self.benchmark_run_index_max}_{save_name}.json"
+                results_dir, f"{save_name}_{timestamp}.json"
             )
         )
         with file_path.open("w") as file:
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 4e65a3b8aa582..7d82d9e488edf 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -35,6 +35,7 @@ class Result:
 class BenchmarkRun:
     results: list[Result]
     name: str = "This PR"
+    hostname: str = "Unknown"
     git_hash: str = ""
     github_repo: str = None
     date: datetime = field(

From 130212d2a2e0b1045605033a09412f430d13721a Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 12:29:36 +0000
Subject: [PATCH 033/114] add hostname to benchmark run

---
 devops/scripts/benchmarks/history.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 2ed63d129d140..d1bdc3bfdb940 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -6,6 +6,7 @@
 import os
 import json
 from pathlib import Path
+import socket
 from utils.result import Result, BenchmarkRun
 from options import Compare, options
 from datetime import datetime, timezone
@@ -82,6 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
+            hostname=socket.gethostname()
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -132,6 +134,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
+            hostname=first_run.hostname
         )
 
         return average_benchmark_run

From 5323386c59d2457d79d1fee27b55dffc93be74a3 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 13 Mar 2025 17:00:23 +0000
Subject: [PATCH 034/114] add SubmitGraph benchmark

... and apply black formatting.
---
 devops/scripts/benchmarks/benches/compute.py | 70 +++++++++++++++++---
 devops/scripts/benchmarks/history.py         | 10 +--
 devops/scripts/benchmarks/main.py            |  2 +-
 devops/scripts/benchmarks/options.py         |  1 +
 devops/scripts/benchmarks/presets.py         |  2 +-
 5 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 1f335cd8838ec..bc9d1d9d80d8a 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,7 +28,7 @@ def setup(self):
             self.directory,
             "compute-benchmarks-repo",
             "https://github.com/intel/compute-benchmarks.git",
-            "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba",
+            "b5cc46acf61766ab00da04e85bd4da4f7591eb21",
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -87,6 +87,19 @@ def benchmarks(self) -> list[Benchmark]:
             UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
+        for in_order_queue in [0, 1]:
+            for num_kernels in [4, 32]:
+                for measure_completion_time in [0, 1]:
+                    benches.append(
+                        GraphApiSubmitGraph(
+                            self,
+                            RUNTIMES.SYCL,
+                            in_order_queue,
+                            num_kernels,
+                            measure_completion_time,
+                        )
+                    )
+
         if options.ur is not None:
             benches += [
                 SubmitKernelUR(self, 0, 0),
@@ -536,14 +549,46 @@ def bin_args(self) -> list[str]:
             "--immediateAppendCmdList=0",
         ]
 
+
+class GraphApiSubmitGraph(ComputeBenchmark):
+    def __init__(
+        self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime
+    ):
+        self.inOrderQueue = inOrderQueue
+        self.numKernels = numKernels
+        self.runtime = runtime
+        self.measureCompletionTime = measureCompletionTime
+        super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph")
+
+    def explicit_group(self):
+        return f"SubmitGraph {self.numKernels}"
+
+    def description(self) -> str:
+        return (
+            f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} "
+            f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution."
+        )
+
+    def name(self):
+        return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
+
+    def bin_args(self) -> list[str]:
+        return [
+            "--iterations=10000",
+            f"--NumKernels={self.numKernels}",
+            f"--MeasureCompletionTime={self.measureCompletionTime}",
+            f"--InOrderQueue={self.inOrderQueue}",
+            "--Profiling=0",
+            "--KernelExecutionTime=1",
+        ]
+
+
 class UllsEmptyKernel(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, wgc, wgs):
         self.wgc = wgc
         self.wgs = wgs
         self.runtime = runtime
-        super().__init__(
-            bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel"
-        )
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel")
 
     def explicit_group(self):
         return f"EmptyKernel {self.wgc} {self.wgs}"
@@ -561,8 +606,19 @@ def bin_args(self) -> list[str]:
             f"--wgc={self.wgs}",
         ]
 
+
 class UllsKernelSwitch(ComputeBenchmark):
-    def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents):
+    def __init__(
+        self,
+        bench,
+        runtime: RUNTIMES,
+        count,
+        kernelTime,
+        barrier,
+        hostVisible,
+        ioq,
+        ctrBasedEvents,
+    ):
         self.count = count
         self.kernelTime = kernelTime
         self.barrier = barrier
@@ -570,9 +626,7 @@ def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVis
         self.ctrBasedEvents = ctrBasedEvents
         self.runtime = runtime
         self.ioq = ioq
-        super().__init__(
-            bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch"
-        )
+        super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch")
 
     def explicit_group(self):
         return f"KernelSwitch {self.count} {self.kernelTime}"
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index d1bdc3bfdb940..f05e0192d26ee 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -83,7 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
-            hostname=socket.gethostname()
+            hostname=socket.gethostname(),
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
@@ -99,11 +99,7 @@ def save(self, save_name, results: list[Result], to_file=True):
 
         # Use formatted timestamp for the filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        file_path = Path(
-            os.path.join(
-                results_dir, f"{save_name}_{timestamp}.json"
-            )
-        )
+        file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
         print(f"Benchmark results saved to {file_path}")
@@ -134,7 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]):
             name=first_run.name,
             git_hash="average",
             date=first_run.date,  # should this be different?
-            hostname=first_run.hostname
+            hostname=first_run.hostname,
         )
 
         return average_benchmark_run
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 1a15e5407daf3..1d7304ea5e212 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -409,7 +409,7 @@ def validate_and_parse_env_args(env_args):
         "--output-dir",
         type=str,
         help="Location for output files, if --output-html or --output_markdown was specified.",
-        default=None
+        default=None,
     )
     parser.add_argument(
         "--dry-run",
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 332d1615bc78d..ced76a5d692f2 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -3,6 +3,7 @@
 
 from presets import presets
 
+
 class Compare(Enum):
     LATEST = "latest"
     AVERAGE = "average"
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 7f5dc8d78460a..e394a8b4b622e 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -30,9 +30,9 @@
     ],
 }
 
+
 def enabled_suites(preset: str) -> list[str]:
     try:
         return presets[preset]
     except KeyError:
         raise ValueError(f"Preset '{preset}' not found.")
-

From 5bd1d568a4371041dab01e071349a2d392c409ba Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:07:11 -0700
Subject: [PATCH 035/114] Restore sycl-linux-run-tests benchmarking action

---
 devops/actions/run-tests/benchmark/action.yml | 106 +++++++++++-------
 1 file changed, 68 insertions(+), 38 deletions(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 7f69fdf832982..f90808f730787 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,16 +27,25 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,37 +55,58 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
-  - name: Run compute-benchmarks
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+  - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
 
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
+      echo "-----"
+      ls
   - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      # TODO -- waiting on security clearance
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
@@ -88,20 +118,20 @@ runs:
         echo "No new results added, skipping push."
       else
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
       fi
-  - name: Find benchmark result artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact

From e9b1375dd0075dd053839370d5fc3bcf95cc4390 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:36:55 -0700
Subject: [PATCH 036/114] Restore old SYCL benchmarking CI

---
 .github/workflows/sycl-linux-run-tests.yml    |  10 ++
 devops/actions/run-tests/benchmark/action.yml |  88 +++++-------
 .../actions/run-tests/benchmark_v2/action.yml | 134 ++++++++++++++++++
 3 files changed, 183 insertions(+), 49 deletions(-)
 create mode 100644 devops/actions/run-tests/benchmark_v2/action.yml

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index c30c5eccbcb62..f5b243cb7fc05 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -155,6 +155,7 @@ on:
           - e2e
           - cts
           - compute-benchmarks
+          - benchmark_v2
 
       env:
         description: |
@@ -330,3 +331,12 @@ jobs:
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+
+    - name: Run benchmarks
+      if: inputs.tests_selector == 'benchmark_v2'
+      uses: ./devops/actions/run-tests/benchmark_v2
+      with:
+        target_devices: ${{ inputs.target_devices }}
+      env:
+        RUNNER_TAG: ${{ inputs.runner }}
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index f90808f730787..03b7d4ad776fd 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -27,25 +27,16 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
-      RUNNER_NAME: ${{ runner.name }}
     run: |
       case "$RUNNER_TAG" in
-        '["PVC_PERF"]' ) ;;
+        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
         *)
           echo "#"
-          echo "# WARNING: Only specific tuned runners are fully supported."
+          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
-      # Ensure runner name has nothing injected
-      # TODO: in terms of security, is this overkill?
-      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
-          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
-          exit 1
-      fi
-      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
-
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -55,11 +46,11 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
-      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
-
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
+      # Taken from ur-benchmark-reusable.yml:
+
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -76,37 +67,36 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
-  - name: Checkout results repo
-    shell: bash
-    run: |
-      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
     shell: bash
     run: |
-      # TODO generate summary + display helpful message here
+      cat << EOF
+      #
+      # NOTE TO DEVELOPERS:
+      #
+
+      Check latter steps of the workflow: This job produces an artifact with:
+        - benchmark results from passing/failing tests
+        - log containing all failing (too slow) benchmarks
+        - log containing all erroring benchmarks
+
+      While this step in the workflow provides debugging output describing this
+      information, it might be easier to inspect the logs from the artifact
+      instead.
+
+      EOF
+      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      echo "-----"
-      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
-        "$(realpath ./llvm_test_workdir)" \
-        --sycl "$(realpath ./toolchain)" \
-        --save baseline \
-        --output-html remote \
-        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --preset Minimal
-      echo "-----"
-      ls
+      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
   - name: Push compute-benchmarks results
     if: always()
     shell: bash
     run: |
-      # TODO redo configuration
-      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      # Load configuration values
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
 
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
@@ -118,20 +108,20 @@ runs:
         echo "No new results added, skipping push."
       else
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
       fi
-#  - name: Find benchmark result artifact here
-#    if: always()
-#    shell: bash
-#    run: |
-#      cat << EOF
-#      #
-#      # Artifact link for benchmark results here:
-#      #
-#      EOF
-#  - name: Archive compute-benchmark results
-#    if: always()
-#    uses: actions/upload-artifact@v4
-#    with:
-#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-#      path: ./artifact
+  - name: Find benchmark result artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for benchmark results here:
+      #
+      EOF
+  - name: Archive compute-benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./artifact
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
new file mode 100644
index 0000000000000..375bc20faf857
--- /dev/null
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -0,0 +1,134 @@
+name: 'Run benchmarks'
+
+# This action assumes the following prerequisites:
+#
+# - SYCL is placed in ./toolchain -- TODO change this
+# - /devops has been checked out in ./devops.
+# - env.GITHUB_TOKEN was properly set, because according to Github, that's
+#   apparently the recommended way to pass a secret into a github action:
+
+#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
+#   only specific runners are fully supported.
+
+inputs:
+  target_devices:
+    type: string
+    required: True
+
+runs:
+  using: "composite"
+  steps:
+  - name: Check specified runner type / target backend
+    shell: bash
+    env:
+      TARGET_DEVICE: ${{ inputs.target_devices }}
+      RUNNER_NAME: ${{ runner.name }}
+    run: |
+      case "$RUNNER_TAG" in
+        '["PVC_PERF"]' ) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only specific tuned runners are fully supported."
+          echo "# This workflow is not guaranteed to work with other runners."
+          echo "#" ;;
+      esac
+
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
+
+      # input.target_devices is not directly used, as this allows code injection
+      case "$TARGET_DEVICE" in
+        level_zero:*) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only level_zero backend is fully supported."
+          echo "# This workflow is not guaranteed to work with other backends."
+          echo "#" ;;
+      esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+  - name: Compute CPU core range to run benchmarks on
+    shell: bash
+    run: |
+      # Compute the core range for the first NUMA node; second node is used by
+      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
+      # work on these.
+      CORES="$(lscpu | awk '
+        /NUMA node0 CPU|On-line CPU/ {line=$0}
+        END {
+          split(line, a, " ")
+          split(a[4], b, ",")
+          sub(/^0/, "4", b[1])
+          print b[1]
+        }')"
+      echo "CPU core range to use: $CORES"
+      echo "CORES=$CORES" >> $GITHUB_ENV
+
+      ZE_AFFINITY_MASK=0
+      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      # TODO generate summary + display helpful message here
+      export CMPLR_ROOT=./toolchain
+      echo "-----"
+      sycl-ls
+      echo "-----"
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save baseline \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --preset Minimal
+      echo "-----"
+      ls
+  - name: Push compute-benchmarks results
+    if: always()
+    shell: bash
+    run: |
+      # TODO redo configuration
+      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "sys_sycl_benchmarks@intel.com"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+      fi
+#  - name: Find benchmark result artifact here
+#    if: always()
+#    shell: bash
+#    run: |
+#      cat << EOF
+#      #
+#      # Artifact link for benchmark results here:
+#      #
+#      EOF
+#  - name: Archive compute-benchmark results
+#    if: always()
+#    uses: actions/upload-artifact@v4
+#    with:
+#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+#      path: ./artifact

From a3edf7aff115c3ebb64c90afe042a177ad4ea2c4 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 12:47:26 -0700
Subject: [PATCH 037/114] Add benchmarking results to sycl-docs.yml

---
 .github/workflows/sycl-docs.yml          | 1 +
 devops/scripts/benchmarks/html/config.js | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 5c1e8e425111b..a45c56bdd869c 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -49,6 +49,7 @@ jobs:
         mkdir clang
         mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* .
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
+        cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index 3e67ae1dce8e5..0a8551c5de152 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,2 +1,2 @@
-//remoteDataUrl = 'https://example.com/data.json';
+remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
 //defaultCompareNames = ['baseline'];

From 6620e4a889664a031414af2107e423f9b7e60169 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= <lukasz.stolarczuk@intel.com>
Date: Thu, 13 Mar 2025 21:20:44 +0100
Subject: [PATCH 038/114] [CI] Bump compute bench (#17431)

- [x] remove the second, test commit
---
 .github/workflows/ur-benchmarks-reusable.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml
index 0aecbffc20fe7..d7c32edfdfc2a 100644
--- a/.github/workflows/ur-benchmarks-reusable.yml
+++ b/.github/workflows/ur-benchmarks-reusable.yml
@@ -161,6 +161,7 @@ jobs:
         taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py
         ~/llvm_bench_workdir
         --sycl ${{ github.workspace }}/sycl_build
+        --ur ${{ github.workspace }}/ur_install
         --adapter ${{ matrix.adapter.str_name }}
         --compare baseline
         --compute-runtime ${{ inputs.compute_runtime_commit }}
@@ -168,9 +169,6 @@ jobs:
         ${{ inputs.upload_report && '--output-html' || '' }}
         ${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
         ${{ inputs.bench_script_params }}
-        # Temporarily disabled due to build faiures
-        # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849
-        # --ur ${{ github.workspace }}/ur_install
 
     - name: Print benchmark results
       run: |

From f4a2e39ad21e498d090fcacf62e519574a3cc0b6 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 16:03:50 -0700
Subject: [PATCH 039/114] Initial implementation of unified benchmark workflow

---
 .github/workflows/benchmark.yml | 122 ++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000000000..3837b119a10e3
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,122 @@
+name: Run Benchmarks
+
+on:
+  schedule:
+    - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
+  workflow_call:
+    inputs:
+      pr_no:
+        type: number
+        required: false
+      upload_results:
+        type: bool
+        required: true
+      runner:
+        type: string
+        required: true
+      backend:
+        type: string
+        required: true
+      reset_intel_gpu:
+        type: bool
+        required: true
+        default: true
+
+  workflow_dispatch:
+    inputs:
+      pr_no:
+        description: Specific PR no. to build
+        type: number
+        required: false
+      upload_results:
+        description: 'Save and upload results'
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+      runner:
+        type: choice
+        options:
+          - '["PVC_PERF"]'
+      backend:
+        description: Backend to use
+        type: choice
+        options:
+          - 'level_zero:gpu'
+        # TODO L0 V2 support
+      reset_intel_gpu:
+        description: Reset Intel GPUs
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+
+permissions:
+  contents: read
+  packages: read
+
+jobs:
+  build_sycl:
+    name: Build SYCL from PR
+    if: inputs.pr_no != null
+    uses: ./.github/workflows/sycl-linux-build.yml
+    with:
+      build_ref: "origin/pr/${{ inputs.pr_no }}/merge"
+      build_cache_root: "/__w/"
+      build_artifact_suffix: "default"
+      build_cache_suffix: "default"
+      # Docker image has last nightly pre-installed and added to the PATH
+      build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
+      cc: clang
+      cxx: clang++
+
+  run_benchmarks_build:
+    name: Run Benchmarks (on PR Build)
+    needs: [ build_sycl ]
+    if: inputs.pr_no != null
+    strategy:
+      matrix:
+        # Set default values if not specified:
+        include:
+          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+            backend: ${{ inputs.backend || 'level_zero:gpu' }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+            ref: origin/pr/${{ inputs.pr_no }}/merge
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
+    with:
+      # TODO support other benchmarks
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      runner: ${{ matrix.runner }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      tests_selector: benchmark_v2
+      repo_ref: ${{ matrix.ref }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
+
+  run_benchmarks_nightly:
+    name: Run Benchmarks (on Nightly Build)
+    if: inputs.pr_no == 0
+    strategy:
+      matrix:
+        # Set default values if not specified:
+        include:
+          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+            backend: ${{ inputs.backend || 'level_zero:gpu' }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    with:
+      # TODO support other benchmarks
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      runner: ${{ matrix.runner }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      tests_selector: benchmark_v2
\ No newline at end of file

From 38394bb5bff746d9b6e57da0f99d91d530412641 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 13 Mar 2025 16:11:56 -0700
Subject: [PATCH 040/114] [CI] Use commit hash instead, fix issues with run

---
 .github/workflows/benchmark.yml | 39 ++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 3837b119a10e3..f044cbb066757 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -5,11 +5,11 @@ on:
     - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
   workflow_call:
     inputs:
-      pr_no:
-        type: number
+      commit_hash:
+        type: string
         required: false
       upload_results:
-        type: bool
+        type: string # true/false: workflow_dispatch does not support booleans
         required: true
       runner:
         type: string
@@ -18,16 +18,17 @@ on:
         type: string
         required: true
       reset_intel_gpu:
-        type: bool
+        type: string  # true/false: workflow_dispatch does not support booleans
         required: true
         default: true
 
   workflow_dispatch:
     inputs:
-      pr_no:
-        description: Specific PR no. to build
-        type: number
+      commit_hash:
+        description: Commit hash to build intel/llvm from
+        type: string
         required: false
+        default: ''
       upload_results:
         description: 'Save and upload results'
         type: choice
@@ -53,17 +54,15 @@ on:
           - true
         default: true
 
-permissions:
-  contents: read
-  packages: read
+permissions: read-all
 
 jobs:
   build_sycl:
     name: Build SYCL from PR
-    if: inputs.pr_no != null
+    if: inputs.commit_hash != ''
     uses: ./.github/workflows/sycl-linux-build.yml
     with:
-      build_ref: "origin/pr/${{ inputs.pr_no }}/merge"
+      build_ref: ${{ inputs.commit_hash }}
       build_cache_root: "/__w/"
       build_artifact_suffix: "default"
       build_cache_suffix: "default"
@@ -71,19 +70,20 @@ jobs:
       build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
       cc: clang
       cxx: clang++
+      changes: '[]'
 
   run_benchmarks_build:
     name: Run Benchmarks (on PR Build)
     needs: [ build_sycl ]
-    if: inputs.pr_no != null
+    if: inputs.commit_hash != ''
     strategy:
       matrix:
         # Set default values if not specified:
         include:
           - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
-            ref: origin/pr/${{ inputs.pr_no }}/merge
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
+            ref: ${{ inputs.commit_hash }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
@@ -96,21 +96,23 @@ jobs:
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       repo_ref: ${{ matrix.ref }}
+      devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
       sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
 
   run_benchmarks_nightly:
     name: Run Benchmarks (on Nightly Build)
-    if: inputs.pr_no == 0
+    if: inputs.commit_hash == ''
     strategy:
       matrix:
         # Set default values if not specified:
         include:
           - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }}
+            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
     with:
       # TODO support other benchmarks
       name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
@@ -119,4 +121,5 @@ jobs:
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
-      tests_selector: benchmark_v2
\ No newline at end of file
+      tests_selector: benchmark_v2
+      repo_ref: ${{ github.ref }}

From f232b93cec0f35c07e2c2ac416bc7699523b0496 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 10:59:56 +0100
Subject: [PATCH 041/114] add benchmark metadata

---
 devops/scripts/benchmarks/benches/base.py    |  25 ++-
 devops/scripts/benchmarks/benches/compute.py |  29 +++-
 devops/scripts/benchmarks/benches/test.py    |  40 +++--
 devops/scripts/benchmarks/html/index.html    | 121 +++++++++++++-
 devops/scripts/benchmarks/html/scripts.js    | 158 ++++++++++++++++---
 devops/scripts/benchmarks/main.py            |  21 ++-
 devops/scripts/benchmarks/output_html.py     |  22 ++-
 devops/scripts/benchmarks/utils/result.py    |   9 ++
 8 files changed, 376 insertions(+), 49 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 77365220dbf85..1135a267864a9 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -6,7 +6,7 @@
 import os
 import shutil
 from pathlib import Path
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from utils.utils import download, run
 import urllib.request
@@ -78,6 +78,9 @@ def download(
     def name(self):
         raise NotImplementedError()
 
+    def description(self):
+        return "No description provided."
+
     def lower_is_better(self):
         return True
 
@@ -96,6 +99,23 @@ def stddev_threshold(self):
     def get_suite_name(self) -> str:
         return self.suite.name()
 
+    def result_names(self) -> list[str]:
+        return [self.name()]
+
+    def notes(self) -> str:
+        return None
+
+    def unstable(self) -> str:
+        return None
+
+    def get_metadata(self) -> BenchmarkMetadata:
+        return BenchmarkMetadata(
+            type='benchmark',
+            description=self.description(),
+            notes=self.notes(),
+            unstable=self.unstable(),
+        )
+
 
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
@@ -106,3 +126,6 @@ def name(self) -> str:
 
     def setup(self):
         return
+
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {}
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index bc9d1d9d80d8a..67ec0bf2087ff 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -8,7 +8,7 @@
 import io
 from utils.utils import run, git_clone, create_build_path
 from .base import Benchmark, Suite
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
 
@@ -54,6 +54,23 @@ def setup(self):
 
         self.built = True
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "SubmitKernel" : BenchmarkMetadata(
+                type="group",
+                description="Measures CPU time overhead of submitting kernels through different APIs.",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
+                      "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
+                      "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
+                      "Work is ongoing to reduce the overhead of the SYCL API",
+            ),
+            "SinKernelGraph" : BenchmarkMetadata(
+                type="group",
+                unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+            ),
+        }
+
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -106,14 +123,7 @@ def benchmarks(self) -> list[Benchmark]:
                 SubmitKernelUR(self, 1, 0),
                 SubmitKernelUR(self, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-                MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1),
-                MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1),
-                MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-                MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1),
-                MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1),
-                MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1),
-                MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0),
                 MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
                 GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
@@ -540,6 +550,9 @@ def description(self) -> str:
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}"
 
+    def unstable(self) -> str:
+        return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 18794d4e9c73c..e7451e24f25cf 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -6,7 +6,7 @@
 import random
 from utils.utils import git_clone
 from .base import Benchmark, Suite
-from utils.result import Result
+from utils.result import BenchmarkMetadata, Result
 from utils.utils import run, create_build_path
 from options import options
 import os
@@ -24,33 +24,49 @@ def name(self) -> str:
 
     def benchmarks(self) -> list[Benchmark]:
         bench_configs = [
-            ("Memory Bandwidth", 2000, 200, "Foo Group"),
-            ("Latency", 100, 20, "Bar Group"),
-            ("Throughput", 1500, 150, "Foo Group"),
-            ("FLOPS", 3000, 300, "Foo Group"),
-            ("Cache Miss Rate", 250, 25, "Bar Group"),
+            ("Memory Bandwidth", 2000, 200, "Foo Group", None, None),
+            ("Latency", 100, 20, "Bar Group", "A Latency test note!", None),
+            ("Throughput", 1500, 150, "Foo Group", None, None),
+            ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"),
+            ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"),
         ]
 
         result = []
-        for base_name, base_value, base_diff, group in bench_configs:
+        for base_name, base_value, base_diff, group, notes, unstable in bench_configs:
             for variant in range(6):
                 value_multiplier = 1.0 + (variant * 0.2)
                 name = f"{base_name} {variant+1}"
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(self, name, value, diff, group))
+                result.append(TestBench(self, name, value, diff, group, notes, unstable))
 
         return result
 
+    def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
+        return {
+            "Foo Group" : BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Foo Group.",
+                notes="This is a test note for Foo Group.",
+            ),
+            "Bar Group" : BenchmarkMetadata(
+                type="group",
+                description="This is a test benchmark for Bar Group.",
+                unstable="This is an unstable note for Bar Group.",
+            ),
+        }
+
 
 class TestBench(Benchmark):
-    def __init__(self, suite, name, value, diff, group=""):
+    def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None):
         super().__init__("", suite)
         self.bname = name
         self.value = value
         self.diff = diff
         self.group = group
+        self.notes_text = notes
+        self.unstable_text = unstable
 
     def name(self):
         return self.bname
@@ -64,6 +80,12 @@ def setup(self):
     def description(self) -> str:
         return f"This is a test benchmark for {self.bname}."
 
+    def notes(self) -> str:
+        return self.notes_text
+
+    def unstable(self) -> str:
+        return self.unstable_text
+
     def run(self, env_vars) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index c10844f15c707..c40174b7f35a0 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -171,7 +171,98 @@
         .extra-info-entry em {
             color: #555;
         }
-</style>
+        .display-options-container {
+            text-align: center;
+            margin-bottom: 24px;
+            padding: 16px;
+            background: #e9ecef;
+            border-radius: 8px;
+        }
+        .display-options-container label {
+            margin: 0 12px;
+            cursor: pointer;
+        }
+        .display-options-container input {
+            margin-right: 8px;
+        }
+        .benchmark-note {
+            background-color: #cfe2ff;
+            color: #084298;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #084298;
+        }
+        .benchmark-unstable {
+            background-color: #f8d7da;
+            color: #842029;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #842029;
+        }
+        .note-text {
+            color: #084298;
+        }
+        .unstable-warning {
+            color: #842029;
+            font-weight: bold;
+        }
+        .unstable-text {
+            color: #842029;
+        }
+        .options-container {
+            margin-bottom: 24px;
+            background: #e9ecef;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+        .options-container summary {
+            padding: 12px 16px;
+            font-weight: 500;
+            cursor: pointer;
+            background: #dee2e6;
+            user-select: none;
+        }
+        .options-container summary:hover {
+            background: #ced4da;
+        }
+        .options-content {
+            padding: 16px;
+            display: flex;
+            flex-wrap: wrap;
+            gap: 24px;
+        }
+        .filter-section {
+            flex: 1;
+            min-width: 300px;
+        }
+        .filter-section h3 {
+            margin-top: 0;
+            margin-bottom: 12px;
+            font-size: 18px;
+            font-weight: 500;
+            text-align: left;
+        }
+        #suite-filters {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 8px;
+        }
+        .display-options {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+        .display-options label {
+            display: flex;
+            align-items: center;
+            cursor: pointer;
+        }
+        .display-options input {
+            margin-right: 8px;
+        }
+    </style>
 </head>
 <body>
     <div class="container">
@@ -182,9 +273,6 @@ <h1>Benchmark Results</h1>
         <div class="filter-container">
             <input type="text" id="bench-filter" placeholder="Regex...">
         </div>
-        <div class="suite-filter-container" id="suite-filters">
-            <!-- Suite checkboxes will be generated by JavaScript -->
-        </div>
         <div class="run-selector">
             <select id="run-select">
                 <option value="">Select a run to compare...</option>
@@ -192,6 +280,31 @@ <h1>Benchmark Results</h1>
             <button onclick="addSelectedRun()">Add</button>
             <div id="selected-runs" class="selected-runs"></div>
         </div>
+        <details class="options-container">
+            <summary>Options</summary>
+            <div class="options-content">
+                <div class="filter-section">
+                    <h3>Suites</h3>
+                    <div id="suite-filters">
+                        <!-- Suite checkboxes will be generated by JavaScript -->
+                    </div>
+                </div>
+
+                <div class="filter-section">
+                    <h3>Display Options</h3>
+                    <div class="display-options">
+                        <label>
+                            <input type="checkbox" id="show-notes" checked>
+                            Director's commentary
+                        </label>
+                        <label>
+                            <input type="checkbox" id="show-unstable">
+                            Show 'it works on my machine' scenarios
+                        </label>
+                    </div>
+                </div>
+            </div>
+        </details>
         <details class="timeseries" open>
             <summary>Historical Results</summary>
             <div class="charts"></div>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 2bd52a70b07c8..ed7e361e14275 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -12,6 +12,10 @@ let timeseriesData, barChartsData, allRunNames;
 // DOM Elements
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
+// Add this at the top of the file with the other variable declarations
+let showNotes = true;
+let showUnstable = false;
+
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
@@ -85,7 +89,8 @@ function createChart(data, containerId, type) {
                 title: {
                     display: true,
                     text: data.unit
-                }
+                },
+                grace: '20%',
             }
         }
     };
@@ -178,7 +183,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     // Create timeseries charts
     filteredTimeseriesData.forEach((data, index) => {
         const containerId = `timeseries-${index}`;
-        const container = createChartContainer(data, containerId);
+        const container = createChartContainer(data, containerId, 'benchmark');
         document.querySelector('.timeseries .charts').appendChild(container);
         createChart(data, containerId, 'time');
     });
@@ -186,7 +191,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     // Create bar charts
     filteredBarChartsData.forEach((data, index) => {
         const containerId = `barchart-${index}`;
-        const container = createChartContainer(data, containerId);
+        const container = createChartContainer(data, containerId, 'group');
         document.querySelector('.bar-charts .charts').appendChild(container);
         createChart(data, containerId, 'bar');
     });
@@ -195,11 +200,41 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) {
     filterCharts();
 }
 
-function createChartContainer(data, canvasId) {
+function createChartContainer(data, canvasId, type) {
     const container = document.createElement('div');
     container.className = 'chart-container';
     container.setAttribute('data-label', data.label);
     container.setAttribute('data-suite', data.suite);
+    
+    // Check if this benchmark is marked as unstable
+    const metadata = metadataForLabel(data.label, type);
+    if (metadata && metadata.unstable) {
+        container.setAttribute('data-unstable', 'true');
+        
+        // Add unstable warning
+        const unstableWarning = document.createElement('div');
+        unstableWarning.className = 'benchmark-unstable';
+        unstableWarning.textContent = metadata.unstable;
+        unstableWarning.style.display = showUnstable ? 'block' : 'none';
+        container.appendChild(unstableWarning);
+    }
+    
+    // Add notes if present
+    if (metadata && metadata.notes) {
+        const noteElement = document.createElement('div');
+        noteElement.className = 'benchmark-note';
+        noteElement.textContent = metadata.notes;
+        noteElement.style.display = showNotes ? 'block' : 'none';
+        container.appendChild(noteElement);
+    }
+    
+    // Add description if present in metadata, but only for groups
+    if (metadata && metadata.description && metadata.type === "group") {
+        const descElement = document.createElement('div');
+        descElement.className = 'benchmark-description';
+        descElement.textContent = metadata.description;
+        container.appendChild(descElement);
+    }
 
     const canvas = document.createElement('canvas');
     canvas.id = canvasId;
@@ -221,11 +256,10 @@ function createChartContainer(data, canvasId) {
     summary.appendChild(downloadButton);
     details.appendChild(summary);
 
-    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
-
     // Create and append extra info
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
+    latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
     extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
     details.appendChild(extraInfo);
 
@@ -234,6 +268,16 @@ function createChartContainer(data, canvasId) {
     return container;
 }
 
+function metadataForLabel(label, type) {
+    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
+        if (metadata.type === type && label.startsWith(key)) {
+            return metadata;
+        }
+    }
+    
+    return null;
+}
+
 // Pre-compute a lookup for the latest run per label
 function createLatestRunsLookup(benchmarkRuns) {
     const latestRunsMap = new Map();
@@ -259,17 +303,31 @@ function generateExtraInfo(latestRunsLookup, data) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
+        const metadata = metadataForLabel(label);
         const latestRun = latestRunsLookup.get(label);
-
-        if (latestRun) {
-            return `<div class="extra-info-entry">
-                        <strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>
-                        <em>Description:</em> ${latestRun.result.description}
-                    </div>`;
+        
+        let html = '<div class="extra-info-entry">';
+        
+        if (metadata) {
+            html += `<strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>`;
+            
+            if (metadata.description) {
+                html += `<em>Description:</em> ${metadata.description}`;
+            }
+            
+            if (metadata.notes) {
+                html += `<br><em>Notes:</em> <span class="note-text">${metadata.notes}</span>`;
+            }
+            
+            if (metadata.unstable) {
+                html += `<br><em class="unstable-warning">⚠️ Unstable:</em> <span class="unstable-text">${metadata.unstable}</span>`;
+            }
+        } else {
+            html += `<strong>${label}:</strong> No data available`;
         }
-        return `<div class="extra-info-entry">
-                        <strong>${label}:</strong> No data available
-                </div>`;
+        
+        html += '</div>';
+        return html;
     }).join('');
 }
 
@@ -331,6 +389,10 @@ function updateURL() {
         url.searchParams.delete('runs');
     }
 
+    // Add toggle states to URL
+    url.searchParams.set('notes', showNotes);
+    url.searchParams.set('unstable', showUnstable);
+
     history.replaceState(null, '', url);
 }
 
@@ -342,7 +404,19 @@ function filterCharts() {
     document.querySelectorAll('.chart-container').forEach(container => {
         const label = container.getAttribute('data-label');
         const suite = container.getAttribute('data-suite');
-        container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none';
+        const isUnstable = container.getAttribute('data-unstable') === 'true';
+
+        // Hide unstable benchmarks if showUnstable is false
+        const shouldShow = regex.test(label) && 
+                          activeSuites.includes(suite) && 
+                          (showUnstable || !isUnstable);
+
+        container.style.display = shouldShow ? '' : 'none';
+    });
+
+    // Update notes visibility
+    document.querySelectorAll('.benchmark-note').forEach(note => {
+        note.style.display = showNotes ? 'block' : 'none';
     });
 
     updateURL();
@@ -395,13 +469,20 @@ function processBarChartsData(benchmarkRuns) {
             if (!result.explicit_group) return;
 
             if (!groupedResults[result.explicit_group]) {
+                // Look up group metadata
+                const groupMetadata = metadataForLabel(result.explicit_group);
+                
                 groupedResults[result.explicit_group] = {
                     label: result.explicit_group,
                     suite: result.suite,
                     unit: result.unit,
                     lower_is_better: result.lower_is_better,
                     labels: [],
-                    datasets: []
+                    datasets: [],
+                    // Add metadata if available
+                    description: groupMetadata?.description || null,
+                    notes: groupMetadata?.notes || null,
+                    unstable: groupMetadata?.unstable || null
                 };
             }
 
@@ -466,6 +547,43 @@ function setupSuiteFilters() {
     });
 }
 
+function setupToggles() {
+    const notesToggle = document.getElementById('show-notes');
+    const unstableToggle = document.getElementById('show-unstable');
+    
+    notesToggle.addEventListener('change', function() {
+        showNotes = this.checked;
+        // Update all note elements visibility
+        document.querySelectorAll('.benchmark-note').forEach(note => {
+            note.style.display = showNotes ? 'block' : 'none';
+        });
+        filterCharts();
+    });
+    
+    unstableToggle.addEventListener('change', function() {
+        showUnstable = this.checked;
+        // Update all unstable warning elements visibility
+        document.querySelectorAll('.benchmark-unstable').forEach(warning => {
+            warning.style.display = showUnstable ? 'block' : 'none';
+        });
+        filterCharts();
+    });
+    
+    // Initialize from URL params if present
+    const notesParam = getQueryParam('notes');
+    const unstableParam = getQueryParam('unstable');
+    
+    if (notesParam !== null) {
+        showNotes = notesParam === 'true';
+        notesToggle.checked = showNotes;
+    }
+    
+    if (unstableParam !== null) {
+        showUnstable = unstableParam === 'true';
+        unstableToggle.checked = showUnstable;
+    }
+}
+
 function initializeCharts() {
     // Process raw data
     timeseriesData = processTimeseriesData(benchmarkRuns);
@@ -502,6 +620,7 @@ function initializeCharts() {
     // Setup UI components
     setupRunSelector();
     setupSuiteFilters();
+    setupToggles();
 
     // Apply URL parameters
     const regexParam = getQueryParam('regex');
@@ -542,7 +661,8 @@ function loadData() {
         fetch(remoteDataUrl)
             .then(response => response.json())
             .then(data => {
-                benchmarkRuns = data;
+                benchmarkRuns = data.runs || data;
+                benchmarkMetadata = data.metadata || benchmarkMetadata || {};
                 initializeCharts();
             })
             .catch(error => {
@@ -553,7 +673,7 @@ function loadData() {
                 loadingIndicator.style.display = 'none'; // Hide loading indicator
             });
     } else {
-        // Use local data
+        // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js)
         initializeCharts();
         loadingIndicator.style.display = 'none'; // Hide loading indicator
     }
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 1d7304ea5e212..8db0549a861a4 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -137,6 +137,18 @@ def process_results(
     return valid_results, processed
 
 
+def collect_metadata(suites):
+    metadata = {}
+
+    for s in suites:
+        metadata.update(s.additionalMetadata())
+        suite_benchmarks = s.benchmarks()
+        for benchmark in suite_benchmarks:
+            metadata[benchmark.name()] = benchmark.get_metadata()
+
+    return metadata
+
+
 def main(directory, additional_env_vars, save_name, compare_names, filter):
     prepare_workdir(directory, INTERNAL_WORKDIR_VERSION)
 
@@ -160,6 +172,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         else []
     )
 
+    # Collect metadata from all benchmarks without setting them up
+    metadata = collect_metadata(suites)
+
+    # If dry run, we're done
+    if options.dry_run:
+        suites = []
+
     benchmarks = []
     failures = {}
 
@@ -290,7 +309,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         html_path = options.output_directory
         if options.output_directory is None:
             html_path = os.path.join(os.path.dirname(__file__), "html")
-        generate_html(history.runs, compare_names, html_path)
+        generate_html(history.runs, compare_names, html_path, metadata)
 
 
 def validate_and_parse_env_args(env_args):
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 49b4d1d84a214..b71f87371b383 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -6,10 +6,17 @@
 import json
 import os
 from options import options
+from utils.result import BenchmarkMetadata
 
 
-def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str):
+def generate_html(
+    benchmark_runs: list,
+    compare_names: list[str],
+    html_path: str,
+    metadata: dict[str, BenchmarkMetadata],
+):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
+    serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
@@ -26,6 +33,11 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str
 
             f.write("\n];\n\n")  # terminates benchmarkRuns
 
+            f.write("benchmarkMetadata = ")
+            json.dump(serializable_metadata, f)
+
+            f.write(";\n\n")  # terminates benchmarkMetadata
+
             f.write("defaultCompareNames = ")
             json.dump(compare_names, f)
             f.write(";\n")  # terminates defaultCompareNames
@@ -34,12 +46,8 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str
     else:
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            f.write("[\n")
-            for i, run in enumerate(benchmark_runs):
-                if i > 0:
-                    f.write(",\n")
-                f.write(run.to_json())
-            f.write("\n]\n")
+            json_data = {"runs": benchmark_runs, "metadata": serializable_metadata}
+            json.dump(json_data, f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 7d82d9e488edf..11d837068b887 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -42,3 +42,12 @@ class BenchmarkRun:
         default=None,
         metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat),
     )
+
+
+@dataclass_json
+@dataclass
+class BenchmarkMetadata:
+    type: str = 'benchmark' # or 'group'
+    description: Optional[str] = None
+    notes: Optional[str] = None
+    unstable: Optional[str] = None

From 30cd308f4faec6e884234fec382c0580cec8f9ca Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:23:37 +0100
Subject: [PATCH 042/114] apply formatting

---
 devops/scripts/benchmarks/benches/base.py    |  2 +-
 devops/scripts/benchmarks/benches/compute.py | 11 +++---
 devops/scripts/benchmarks/benches/test.py    |  8 ++--
 devops/scripts/benchmarks/html/scripts.js    | 40 ++++++++++----------
 devops/scripts/benchmarks/utils/result.py    |  2 +-
 5 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 1135a267864a9..8403097eca168 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -110,7 +110,7 @@ def unstable(self) -> str:
 
     def get_metadata(self) -> BenchmarkMetadata:
         return BenchmarkMetadata(
-            type='benchmark',
+            type="benchmark",
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 67ec0bf2087ff..9386f4d2b1b35 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -56,21 +56,20 @@ def setup(self):
 
     def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
         return {
-            "SubmitKernel" : BenchmarkMetadata(
+            "SubmitKernel": BenchmarkMetadata(
                 type="group",
                 description="Measures CPU time overhead of submitting kernels through different APIs.",
                 notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
-                      "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
-                      "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
-                      "Work is ongoing to reduce the overhead of the SYCL API",
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
+                "Work is ongoing to reduce the overhead of the SYCL API",
             ),
-            "SinKernelGraph" : BenchmarkMetadata(
+            "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
             ),
         }
 
-
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index e7451e24f25cf..3802597f5c48a 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -39,18 +39,20 @@ def benchmarks(self) -> list[Benchmark]:
                 value = base_value * value_multiplier
                 diff = base_diff * value_multiplier
 
-                result.append(TestBench(self, name, value, diff, group, notes, unstable))
+                result.append(
+                    TestBench(self, name, value, diff, group, notes, unstable)
+                )
 
         return result
 
     def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
         return {
-            "Foo Group" : BenchmarkMetadata(
+            "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
                 notes="This is a test note for Foo Group.",
             ),
-            "Bar Group" : BenchmarkMetadata(
+            "Bar Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Bar Group.",
                 unstable="This is an unstable note for Bar Group.",
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index ed7e361e14275..4136bb647b079 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -205,12 +205,12 @@ function createChartContainer(data, canvasId, type) {
     container.className = 'chart-container';
     container.setAttribute('data-label', data.label);
     container.setAttribute('data-suite', data.suite);
-    
+
     // Check if this benchmark is marked as unstable
     const metadata = metadataForLabel(data.label, type);
     if (metadata && metadata.unstable) {
         container.setAttribute('data-unstable', 'true');
-        
+
         // Add unstable warning
         const unstableWarning = document.createElement('div');
         unstableWarning.className = 'benchmark-unstable';
@@ -218,7 +218,7 @@ function createChartContainer(data, canvasId, type) {
         unstableWarning.style.display = showUnstable ? 'block' : 'none';
         container.appendChild(unstableWarning);
     }
-    
+
     // Add notes if present
     if (metadata && metadata.notes) {
         const noteElement = document.createElement('div');
@@ -227,7 +227,7 @@ function createChartContainer(data, canvasId, type) {
         noteElement.style.display = showNotes ? 'block' : 'none';
         container.appendChild(noteElement);
     }
-    
+
     // Add description if present in metadata, but only for groups
     if (metadata && metadata.description && metadata.type === "group") {
         const descElement = document.createElement('div');
@@ -274,7 +274,7 @@ function metadataForLabel(label, type) {
             return metadata;
         }
     }
-    
+
     return null;
 }
 
@@ -305,27 +305,27 @@ function generateExtraInfo(latestRunsLookup, data) {
     return labels.map(label => {
         const metadata = metadataForLabel(label);
         const latestRun = latestRunsLookup.get(label);
-        
+
         let html = '<div class="extra-info-entry">';
-        
+
         if (metadata) {
             html += `<strong>${label}:</strong> ${formatCommand(latestRun.result)}<br>`;
-            
+
             if (metadata.description) {
                 html += `<em>Description:</em> ${metadata.description}`;
             }
-            
+
             if (metadata.notes) {
                 html += `<br><em>Notes:</em> <span class="note-text">${metadata.notes}</span>`;
             }
-            
+
             if (metadata.unstable) {
                 html += `<br><em class="unstable-warning">⚠️ Unstable:</em> <span class="unstable-text">${metadata.unstable}</span>`;
             }
         } else {
             html += `<strong>${label}:</strong> No data available`;
         }
-        
+
         html += '</div>';
         return html;
     }).join('');
@@ -407,9 +407,9 @@ function filterCharts() {
         const isUnstable = container.getAttribute('data-unstable') === 'true';
 
         // Hide unstable benchmarks if showUnstable is false
-        const shouldShow = regex.test(label) && 
-                          activeSuites.includes(suite) && 
-                          (showUnstable || !isUnstable);
+        const shouldShow = regex.test(label) &&
+            activeSuites.includes(suite) &&
+            (showUnstable || !isUnstable);
 
         container.style.display = shouldShow ? '' : 'none';
     });
@@ -471,7 +471,7 @@ function processBarChartsData(benchmarkRuns) {
             if (!groupedResults[result.explicit_group]) {
                 // Look up group metadata
                 const groupMetadata = metadataForLabel(result.explicit_group);
-                
+
                 groupedResults[result.explicit_group] = {
                     label: result.explicit_group,
                     suite: result.suite,
@@ -550,7 +550,7 @@ function setupSuiteFilters() {
 function setupToggles() {
     const notesToggle = document.getElementById('show-notes');
     const unstableToggle = document.getElementById('show-unstable');
-    
+
     notesToggle.addEventListener('change', function() {
         showNotes = this.checked;
         // Update all note elements visibility
@@ -559,7 +559,7 @@ function setupToggles() {
         });
         filterCharts();
     });
-    
+
     unstableToggle.addEventListener('change', function() {
         showUnstable = this.checked;
         // Update all unstable warning elements visibility
@@ -568,16 +568,16 @@ function setupToggles() {
         });
         filterCharts();
     });
-    
+
     // Initialize from URL params if present
     const notesParam = getQueryParam('notes');
     const unstableParam = getQueryParam('unstable');
-    
+
     if (notesParam !== null) {
         showNotes = notesParam === 'true';
         notesToggle.checked = showNotes;
     }
-    
+
     if (unstableParam !== null) {
         showUnstable = unstableParam === 'true';
         unstableToggle.checked = showUnstable;
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 11d837068b887..b29d973602a35 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -47,7 +47,7 @@ class BenchmarkRun:
 @dataclass_json
 @dataclass
 class BenchmarkMetadata:
-    type: str = 'benchmark' # or 'group'
+    type: str = "benchmark"  # or 'group'
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None

From 5e0539a50fd9835b99391a6bc91e833604cf40ea Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:33:26 +0100
Subject: [PATCH 043/114] fix multiple descriptions/notes

---
 devops/scripts/benchmarks/benches/compute.py | 8 ++++----
 devops/scripts/benchmarks/benches/test.py    | 3 ++-
 devops/scripts/benchmarks/html/index.html    | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 9386f4d2b1b35..f69df1966d690 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -59,10 +59,10 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "SubmitKernel": BenchmarkMetadata(
                 type="group",
                 description="Measures CPU time overhead of submitting kernels through different APIs.",
-                notes="Each layer builds on top of the previous layer, adding functionality and overhead. "
-                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. "
-                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance."
-                "Work is ongoing to reduce the overhead of the SYCL API",
+                notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n"
+                "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
+                "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
+                "Work is ongoing to reduce the overhead of the SYCL API\n",
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 3802597f5c48a..0e4ee55286fb0 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -50,7 +50,8 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
-                notes="This is a test note for Foo Group.",
+                notes="This is a test note for Foo Group.\n"
+                      "Look, multiple lines!",
             ),
             "Bar Group": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index c40174b7f35a0..446b103029c80 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -192,6 +192,7 @@
             margin-bottom: 10px;
             border-radius: 5px;
             border-left: 4px solid #084298;
+            white-space: pre-line;
         }
         .benchmark-unstable {
             background-color: #f8d7da;
@@ -200,6 +201,7 @@
             margin-bottom: 10px;
             border-radius: 5px;
             border-left: 4px solid #842029;
+            white-space: pre-line;
         }
         .note-text {
             color: #084298;

From 137407a3e41f8764e51a42e88d16c7f6b6abcb79 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:38:20 +0100
Subject: [PATCH 044/114] fix benchmark descriptions

---
 devops/scripts/benchmarks/html/index.html | 10 ++++++++++
 devops/scripts/benchmarks/html/scripts.js |  6 +++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 446b103029c80..32a00ab67bb47 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -264,6 +264,16 @@
         .display-options input {
             margin-right: 8px;
         }
+        .benchmark-description {
+            background-color: #f2f2f2;
+            color: #333;
+            padding: 10px;
+            margin-bottom: 10px;
+            border-radius: 5px;
+            border-left: 4px solid #6c757d;
+            white-space: pre-line;
+            font-style: italic;
+        }
     </style>
 </head>
 <body>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 4136bb647b079..47d23ff8f6e9d 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -260,7 +260,7 @@ function createChartContainer(data, canvasId, type) {
     const extraInfo = document.createElement('div');
     extraInfo.className = 'extra-info';
     latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
-    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data);
+    extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data, 'benchmark');
     details.appendChild(extraInfo);
 
     container.appendChild(details);
@@ -299,11 +299,11 @@ function createLatestRunsLookup(benchmarkRuns) {
     return latestRunsMap;
 }
 
-function generateExtraInfo(latestRunsLookup, data) {
+function generateExtraInfo(latestRunsLookup, data, type) {
     const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label];
 
     return labels.map(label => {
-        const metadata = metadataForLabel(label);
+        const metadata = metadataForLabel(label, type);
         const latestRun = latestRunsLookup.get(label);
 
         let html = '<div class="extra-info-entry">';

From e0f5ca61518604940f08ad0eb7f21ed5b42aa945 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:46:57 +0100
Subject: [PATCH 045/114] fix remote html output

---
 devops/scripts/benchmarks/benches/test.py |  3 +-
 devops/scripts/benchmarks/output_html.py  | 36 +++++++++++------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 0e4ee55286fb0..4862bc64ecbaf 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -50,8 +50,7 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             "Foo Group": BenchmarkMetadata(
                 type="group",
                 description="This is a test benchmark for Foo Group.",
-                notes="This is a test note for Foo Group.\n"
-                      "Look, multiple lines!",
+                notes="This is a test note for Foo Group.\n" "Look, multiple lines!",
             ),
             "Bar Group": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index b71f87371b383..40a3f914e5115 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -18,36 +18,36 @@ def generate_html(
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
     serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
 
+    serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs]
+
+    data = {
+        "runs": serializable_runs,
+        "metadata": serializable_metadata,
+        "defaultCompareNames": compare_names,
+    }
+
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
-        # Write data to js file
-        # We can't store this as a standalone json file because it needs to be inline in the html
         with open(data_path, "w") as f:
-            f.write("benchmarkRuns = [\n")
-            # it might be tempting to just to create a list and convert
-            # that to a json, but that leads to json being serialized twice.
-            for i, run in enumerate(benchmark_runs):
-                if i > 0:
-                    f.write(",\n")
-                f.write(run.to_json())
-
-            f.write("\n];\n\n")  # terminates benchmarkRuns
+            # For local format, we need to write JavaScript variable assignments
+            f.write("benchmarkRuns = ")
+            json.dump(data["runs"], f, indent=2)
+            f.write(";\n\n")
 
             f.write("benchmarkMetadata = ")
-            json.dump(serializable_metadata, f)
-
-            f.write(";\n\n")  # terminates benchmarkMetadata
+            json.dump(data["metadata"], f, indent=2)
+            f.write(";\n\n")
 
             f.write("defaultCompareNames = ")
-            json.dump(compare_names, f)
-            f.write(";\n")  # terminates defaultCompareNames
+            json.dump(data["defaultCompareNames"], f, indent=2)
+            f.write(";\n")
 
         print(f"See {os.getcwd()}/html/index.html for the results.")
     else:
+        # For remote format, we write a single JSON file
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            json_data = {"runs": benchmark_runs, "metadata": serializable_metadata}
-            json.dump(json_data, f, indent=2)
+            json.dump(data, f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."

From 1041db695a7da031879bea08f2b2b0b0c9e76151 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 12:55:39 +0100
Subject: [PATCH 046/114] fix metadata collection with dry run

---
 devops/scripts/benchmarks/main.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 8db0549a861a4..e701b9eac70a2 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -159,18 +159,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         options.extra_ld_libraries.extend(cr.ld_libraries())
         options.extra_env_vars.update(cr.env_vars())
 
-    suites = (
-        [
-            ComputeBench(directory),
-            VelocityBench(directory),
-            SyclBench(directory),
-            LlamaCppBench(directory),
-            UMFSuite(directory),
-            TestSuite(),
-        ]
-        if not options.dry_run
-        else []
-    )
+    suites = [
+        ComputeBench(directory),
+        VelocityBench(directory),
+        SyclBench(directory),
+        LlamaCppBench(directory),
+        UMFSuite(directory),
+        TestSuite(),
+    ]
 
     # Collect metadata from all benchmarks without setting them up
     metadata = collect_metadata(suites)

From fae04f46984b39925c1724b0e3c7125490d4ab7b Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Fri, 14 Mar 2025 13:30:46 +0100
Subject: [PATCH 047/114] cleanup compute bench, fix readme, use newer
 sycl-bench

---
 devops/scripts/benchmarks/README.md           |   8 +-
 devops/scripts/benchmarks/benches/base.py     |  13 +-
 devops/scripts/benchmarks/benches/compute.py  | 201 ++++++++----------
 .../scripts/benchmarks/benches/syclbench.py   |   4 +-
 devops/scripts/benchmarks/main.py             |   2 +-
 5 files changed, 99 insertions(+), 129 deletions(-)

diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
index 004fe14eca35b..fcadded3cad51 100644
--- a/devops/scripts/benchmarks/README.md
+++ b/devops/scripts/benchmarks/README.md
@@ -6,6 +6,8 @@ Scripts for running performance tests on SYCL and Unified Runtime.
 
 - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench)
 - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/)
+- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp)
+- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench)
 
 ## Running
 
@@ -27,8 +29,6 @@ You can also include additional benchmark parameters, such as environment variab
 
 Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request.
 
-By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data.
-
 You must be a member of the `oneapi-src` organization to access these features.
 
 ## Comparing results
@@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option
 
 You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare <previously_saved_data>` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare <name1> --compare <name2> --relative-perf <name1>`, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis.
 
-Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
-are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html).
+Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
+are stored [here](https://oneapi-src.github.io/unified-runtime/performance/).
 
 ## Output formats
 You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.
diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 8403097eca168..1bc99b11518e3 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -75,12 +75,6 @@ def download(
         self.data_path = self.create_data_path(name, skip_data_dir)
         return download(self.data_path, url, file, untar, unzip, checksum)
 
-    def name(self):
-        raise NotImplementedError()
-
-    def description(self):
-        return "No description provided."
-
     def lower_is_better(self):
         return True
 
@@ -99,8 +93,11 @@ def stddev_threshold(self):
     def get_suite_name(self) -> str:
         return self.suite.name()
 
-    def result_names(self) -> list[str]:
-        return [self.name()]
+    def name(self):
+        raise NotImplementedError()
+
+    def description(self):
+        return "No description provided."
 
     def notes(self) -> str:
         return None
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index f69df1966d690..c26f645635d27 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -13,6 +13,20 @@
 from enum import Enum
 
 
+class RUNTIMES(Enum):
+    SYCL = "sycl"
+    LEVEL_ZERO = "l0"
+    UR = "ur"
+
+
+def runtime_to_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "SYCL",
+        RUNTIMES.LEVEL_ZERO: "Level Zero",
+        RUNTIMES.UR: "Unified Runtime",
+    }[runtime]
+
+
 class ComputeBench(Suite):
     def __init__(self, directory):
         self.directory = directory
@@ -70,6 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
             ),
         }
 
+    def enabled_runtimes(self, supported_runtimes=None):
+        # all runtimes in the RUNTIMES enum
+        runtimes = supported_runtimes or list(RUNTIMES)
+
+        # Filter out UR if not available
+        if options.ur is None:
+            runtimes = [r for r in runtimes if r != RUNTIMES.UR]
+
+        return runtimes
+
     def benchmarks(self) -> list[Benchmark]:
         if options.sycl is None:
             return []
@@ -77,11 +101,46 @@ def benchmarks(self) -> list[Benchmark]:
         if options.ur_adapter == "cuda":
             return []
 
-        benches = [
-            SubmitKernelL0(self, 0),
-            SubmitKernelL0(self, 1),
-            SubmitKernelSYCL(self, 0),
-            SubmitKernelSYCL(self, 1),
+        benches = []
+
+        # Add SubmitKernel benchmarks using loops
+        for runtime in self.enabled_runtimes():
+            for in_order_queue in [0, 1]:
+                for measure_completion in [0, 1]:
+                    benches.append(
+                        SubmitKernel(self, runtime, in_order_queue, measure_completion)
+                    )
+
+        # Add SinKernelGraph benchmarks
+        for runtime in self.enabled_runtimes():
+            for with_graphs in [0, 1]:
+                for num_kernels in [5, 100]:
+                    benches.append(
+                        GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels)
+                    )
+
+        # Add ULLS benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]):
+            benches.append(UllsEmptyKernel(self, runtime, 1000, 256))
+            benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1))
+
+        # Add GraphApiSubmitGraph benchmarks
+        for runtime in self.enabled_runtimes([RUNTIMES.SYCL]):
+            for in_order_queue in [0, 1]:
+                for num_kernels in [4, 10, 32]:
+                    for measure_completion_time in [0, 1]:
+                        benches.append(
+                            GraphApiSubmitGraph(
+                                self,
+                                runtime,
+                                in_order_queue,
+                                num_kernels,
+                                measure_completion_time,
+                            )
+                        )
+
+        # Add other benchmarks
+        benches += [
             QueueInOrderMemcpy(self, 0, "Device", "Device", 1024),
             QueueInOrderMemcpy(self, 0, "Host", "Device", 1024),
             QueueMemcpy(self, "Device", "Device", 1024),
@@ -89,45 +148,14 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
-            UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256),
-            UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256),
-            UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1),
-            UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1),
         ]
 
-        for in_order_queue in [0, 1]:
-            for num_kernels in [4, 32]:
-                for measure_completion_time in [0, 1]:
-                    benches.append(
-                        GraphApiSubmitGraph(
-                            self,
-                            RUNTIMES.SYCL,
-                            in_order_queue,
-                            num_kernels,
-                            measure_completion_time,
-                        )
-                    )
-
+        # Add UR-specific benchmarks
         if options.ur is not None:
             benches += [
-                SubmitKernelUR(self, 0, 0),
-                SubmitKernelUR(self, 1, 0),
-                SubmitKernelUR(self, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
                 MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
                 MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100),
-                GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100),
             ]
 
         return benches
@@ -228,98 +256,49 @@ def teardown(self):
         return
 
 
-class SubmitKernelSYCL(ComputeBenchmark):
-    def __init__(self, bench, ioq):
+class SubmitKernel(ComputeBenchmark):
+    def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel")
+        self.runtime = runtime
+        self.measure_completion = measure_completion
+        super().__init__(
+            bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
+        )
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_sycl SubmitKernel {order}"
+        completion_str = " with measure completion" if self.measure_completion else ""
+        return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}"
 
     def explicit_group(self):
-        return "SubmitKernel"
-
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            "--MeasureCompletion=0",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-    def description(self) -> str:
-        order = "in-order" if self.ioq else "out-of-order"
         return (
-            f"Measures CPU time overhead of submitting {order} kernels through SYCL API."
-            "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
-        )
-
-
-class SubmitKernelUR(ComputeBenchmark):
-    def __init__(self, bench, ioq, measureCompletion):
-        self.ioq = ioq
-        self.measureCompletion = measureCompletion
-        super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_ur SubmitKernel {order}" + (
-            " with measure completion" if self.measureCompletion else ""
+            "SubmitKernel"
+            if self.measure_completion == 0
+            else "SubmitKernel With Completion"
         )
 
-    def explicit_group(self):
-        return "SubmitKernel"
-
     def description(self) -> str:
         order = "in-order" if self.ioq else "out-of-order"
-        completion = "including" if self.measureCompletion else "excluding"
-        return (
-            f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, "
-            f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time "
-            f"to isolate API overhead."
-        )
+        runtime_name = runtime_to_name(self.runtime)
 
-    def bin_args(self) -> list[str]:
-        return [
-            f"--Ioq={self.ioq}",
-            "--DiscardEvents=0",
-            f"--MeasureCompletion={self.measureCompletion}",
-            "--iterations=100000",
-            "--Profiling=0",
-            "--NumKernels=10",
-            "--KernelExecTime=1",
-        ]
-
-
-class SubmitKernelL0(ComputeBenchmark):
-    def __init__(self, bench, ioq):
-        self.ioq = ioq
-        super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel")
-
-    def name(self):
-        order = "in order" if self.ioq else "out of order"
-        return f"api_overhead_benchmark_l0 SubmitKernel {order}"
+        completion_desc = ""
+        if self.runtime == RUNTIMES.UR:
+            completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time"
 
-    def explicit_group(self):
-        return "SubmitKernel"
+        l0_specific = ""
+        if self.runtime == RUNTIMES.LEVEL_ZERO:
+            l0_specific = " Uses immediate command lists"
 
-    def description(self) -> str:
-        order = "in-order" if self.ioq else "out-of-order"
         return (
-            f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. "
-            f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead "
-            f"from execution time."
+            f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
+            f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}"
         )
 
     def bin_args(self) -> list[str]:
         return [
             f"--Ioq={self.ioq}",
             "--DiscardEvents=0",
-            "--MeasureCompletion=0",
+            f"--MeasureCompletion={self.measure_completion}",
             "--iterations=100000",
             "--Profiling=0",
             "--NumKernels=10",
@@ -521,12 +500,6 @@ def bin_args(self) -> list[str]:
         ]
 
 
-class RUNTIMES(Enum):
-    SYCL = "sycl"
-    LEVEL_ZERO = "l0"
-    UR = "ur"
-
-
 class GraphApiSinKernelGraph(ComputeBenchmark):
     def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels):
         self.withGraphs = withGraphs
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index cc2db0a2fcf7c..14c0104d0a08c 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -31,8 +31,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "sycl-bench-repo",
-            "https://github.com/mateuszpn/sycl-bench.git",
-            "1e6ab2cfd004a72c5336c26945965017e06eab71",
+            "https://github.com/unisa-hpc/sycl-bench.git",
+            "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b",
         )
 
         configure_command = [
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index e701b9eac70a2..d05575a5a06ca 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -333,7 +333,7 @@ def validate_and_parse_env_args(env_args):
     parser.add_argument(
         "--adapter",
         type=str,
-        help="Options to build the Unified Runtime as part of the benchmark",
+        help="Unified Runtime adapter to use.",
         default="level_zero",
     )
     parser.add_argument(

From cfa4a9cbc5166db535b3754fa7023d01b2589594 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 08:12:22 -0700
Subject: [PATCH 048/114] [CI] configure upload results

---
 .github/workflows/benchmark.yml                  | 2 ++
 .github/workflows/sycl-linux-run-tests.yml       | 6 ++++++
 devops/actions/run-tests/benchmark_v2/action.yml | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index f044cbb066757..ca0364f94fde5 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -95,6 +95,7 @@ jobs:
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
+      benchmark_upload_results: ${{ inputs.upload_results }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
@@ -122,4 +123,5 @@ jobs:
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
+      benchmark_upload_results: ${{ inputs.upload_results }}
       repo_ref: ${{ github.ref }}
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index f5b243cb7fc05..cc0b5685afec2 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -114,6 +114,11 @@ on:
         default: ''
         required: False
 
+      benchmark_upload_results:
+        type: string
+        required: False
+        default: 'false'
+
   workflow_dispatch:
     inputs:
       runner:
@@ -337,6 +342,7 @@ jobs:
       uses: ./devops/actions/run-tests/benchmark_v2
       with:
         target_devices: ${{ inputs.target_devices }}
+        upload_results: ${{ inputs.benchmark_upload_results }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 375bc20faf857..e75f4b309499d 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -16,6 +16,9 @@ inputs:
   target_devices:
     type: string
     required: True
+  upload_results:
+    type: string
+    required: True
 
 runs:
   using: "composite"
@@ -99,7 +102,7 @@ runs:
       echo "-----"
       ls
   - name: Push compute-benchmarks results
-    if: always()
+    if: inputs.upload_results == 'true' && always()
     shell: bash
     run: |
       # TODO redo configuration

From ca963e6b9aaa91921e41ef2501891a0bec684ac2 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:02:05 -0700
Subject: [PATCH 049/114] [CI] Change config to update during workflow run
 instead

---
 .github/workflows/sycl-docs.yml          | 5 +++++
 devops/scripts/benchmarks/html/config.js | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index a45c56bdd869c..6b748ec9c7ebb 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -51,6 +51,11 @@ jobs:
         mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/
         cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
+        # Update benchmarking dashboard configuration
+        cat << 'EOF' > benchmarks/config.js
+        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
+        defaultCompareNames = ["Baseline_PVC_L0"];
+        EOF 
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js
index 0a8551c5de152..3e67ae1dce8e5 100644
--- a/devops/scripts/benchmarks/html/config.js
+++ b/devops/scripts/benchmarks/html/config.js
@@ -1,2 +1,2 @@
-remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
+//remoteDataUrl = 'https://example.com/data.json';
 //defaultCompareNames = ['baseline'];

From 45a02e15ccb3cc01f408c41b3aa27c678c9a30c9 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:28:09 -0700
Subject: [PATCH 050/114] [CI] Change save name depending on build

---
 .github/workflows/benchmark.yml               |  1 +
 .github/workflows/sycl-linux-run-tests.yml    |  7 +++-
 .../actions/run-tests/benchmark_v2/action.yml | 32 +++++++++----------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ca0364f94fde5..ff2fddb2ae88d 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -96,6 +96,7 @@ jobs:
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       benchmark_upload_results: ${{ inputs.upload_results }}
+      benchmark_build_hash: ${{ inputs.commit_hash }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index cc0b5685afec2..09821955a5b58 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -116,8 +116,12 @@ on:
 
       benchmark_upload_results:
         type: string
-        required: False
         default: 'false'
+        required: False
+      benchmark_build_hash:
+        type: string
+        default: ''
+        required: False
 
   workflow_dispatch:
     inputs:
@@ -343,6 +347,7 @@ jobs:
       with:
         target_devices: ${{ inputs.target_devices }}
         upload_results: ${{ inputs.benchmark_upload_results }}
+        build_hash: ${{ inputs.benchmark_build_hash }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index e75f4b309499d..bab571ec16ff2 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -19,6 +19,10 @@ inputs:
   upload_results:
     type: string
     required: True
+  build_hash:
+    type: string
+    required: False
+    default: ''
 
 runs:
   using: "composite"
@@ -81,6 +85,8 @@ runs:
     run: |
       git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
+    env:
+      BUILD_HASH: ${{ inputs.build_hash }}
     shell: bash
     run: |
       # TODO generate summary + display helpful message here
@@ -91,16 +97,22 @@ runs:
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       echo "-----"
       mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
+
+      # TODO accomodate for different GPUs and backends
+      SAVE_NAME="Baseline_PVC_L0"
+      if [ -n "$BUILD_HASH" ]; then
+          SAVE_NAME="Commit_PVC_$BUILD_HASH"
+      fi
+
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
         "$(realpath ./llvm_test_workdir)" \
         --sycl "$(realpath ./toolchain)" \
-        --save baseline \
+        --save "$SAVE_NAME" \
         --output-html remote \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --preset Minimal
       echo "-----"
-      ls
   - name: Push compute-benchmarks results
     if: inputs.upload_results == 'true' && always()
     shell: bash
@@ -120,18 +132,4 @@ runs:
         git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
         git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
       fi
-#  - name: Find benchmark result artifact here
-#    if: always()
-#    shell: bash
-#    run: |
-#      cat << EOF
-#      #
-#      # Artifact link for benchmark results here:
-#      #
-#      EOF
-#  - name: Archive compute-benchmark results
-#    if: always()
-#    uses: actions/upload-artifact@v4
-#    with:
-#      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-#      path: ./artifact
+

From 98f9d388393ec858c92dc72da7d0420362763562 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:33:32 -0700
Subject: [PATCH 051/114] bump to 2024-2025

---
 devops/scripts/benchmarks/benches/base.py          | 2 +-
 devops/scripts/benchmarks/benches/llamacpp.py      | 2 +-
 devops/scripts/benchmarks/benches/syclbench.py     | 2 +-
 devops/scripts/benchmarks/benches/test.py          | 2 +-
 devops/scripts/benchmarks/benches/umf.py           | 2 +-
 devops/scripts/benchmarks/benches/velocity.py      | 2 +-
 devops/scripts/benchmarks/history.py               | 2 +-
 devops/scripts/benchmarks/html/index.html          | 2 +-
 devops/scripts/benchmarks/html/scripts.js          | 2 +-
 devops/scripts/benchmarks/main.py                  | 2 +-
 devops/scripts/benchmarks/output_html.py           | 2 +-
 devops/scripts/benchmarks/presets.py               | 2 +-
 devops/scripts/benchmarks/utils/compute_runtime.py | 2 +-
 devops/scripts/benchmarks/utils/oneapi.py          | 2 +-
 devops/scripts/benchmarks/utils/result.py          | 2 +-
 devops/scripts/benchmarks/utils/utils.py           | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 1bc99b11518e3..3ca6e3a7b7d3b 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index c12f811942849..38633912b001a 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 14c0104d0a08c..b846b0853ce66 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 4862bc64ecbaf..7afdd803b5cc3 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 1f736e7755f92..e465d5e9e01c9 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 652a831d0222e..916a321b143cf 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index f05e0192d26ee..0b80c54ad7393 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 32a00ab67bb47..dc79c2a4781b6 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -1,5 +1,5 @@
 <!--
-  Copyright (C) 2024 Intel Corporation
+  Copyright (C) 2024-2025 Intel Corporation
   Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
   See LICENSE.TXT
   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 47d23ff8f6e9d..175ab92862e17 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -1,4 +1,4 @@
-// Copyright (C) 2024 Intel Corporation
+// Copyright (C) 2024-2025 Intel Corporation
 // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 // See LICENSE.TXT
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index d05575a5a06ca..c4445ddee28db 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 40a3f914e5115..e69dfeb153b49 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index e394a8b4b622e..3f191766deb8c 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index 74d8ff4eb5345..f4864c112f640 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py
index e1876b5ed37fb..fc27b9a8b2d3e 100644
--- a/devops/scripts/benchmarks/utils/oneapi.py
+++ b/devops/scripts/benchmarks/utils/oneapi.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index b29d973602a35..2d9b7e914bd8d 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py
index 2d5fad6cd8917..54f2ef7fb9c1f 100644
--- a/devops/scripts/benchmarks/utils/utils.py
+++ b/devops/scripts/benchmarks/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2025 Intel Corporation
 # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

From ef88ea0956b6735fa40fdda7da509f5ba160e434 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 14 Mar 2025 09:41:47 -0700
Subject: [PATCH 052/114] [CI] Enforce commit hash to be string regardless

---
 .github/workflows/benchmark.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index ff2fddb2ae88d..8e860bce6a384 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -8,6 +8,7 @@ on:
       commit_hash:
         type: string
         required: false
+        default: ''
       upload_results:
         type: string # true/false: workflow_dispatch does not support booleans
         required: true

From c65540df94a342792b59b5b43125bb889bfb007f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 17 Mar 2025 22:22:34 -0700
Subject: [PATCH 053/114] Initial implementation of comparison code

---
 devops/scripts/benchmarks/compare.py         | 188 +++++++++++++++++++
 devops/scripts/benchmarks/options.py         |   2 +
 devops/scripts/benchmarks/utils/aggregate.py | 105 +++++++++++
 devops/scripts/benchmarks/utils/validate.py  |  22 +++
 4 files changed, 317 insertions(+)
 create mode 100644 devops/scripts/benchmarks/compare.py
 create mode 100644 devops/scripts/benchmarks/utils/aggregate.py
 create mode 100644 devops/scripts/benchmarks/utils/validate.py

diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
new file mode 100644
index 0000000000000..18efafa91a3d2
--- /dev/null
+++ b/devops/scripts/benchmarks/compare.py
@@ -0,0 +1,188 @@
+import os
+import sys
+import json
+from pathlib import Path
+from dataclasses import dataclass, asdict
+
+from utils.aggregate import SimpleMedian
+from utils.validate import Validate
+from utils.result import Result, BenchmarkRun
+from options import options
+
+@dataclass
+class BenchmarkHistoricAverage:
+    # Name of benchmark as defined in Benchmark class definition
+    name: str
+
+    # Measure of central tendency used to compute "average"
+    average_type: str
+    # TODO replace this with Compare enum?
+    # However, compare enum's use in the history is ambiguous, perhaps a new enum
+    # should replace both
+
+    # Value recorded from the benchmark
+    value: float
+    # TODO "value" in compute_benchmark assumes median, what about tracking e.g.
+    # standard deviation through this process?
+
+    # Arguments used to call the benchmark executable.
+    #
+    # This exists to ensure benchmarks called using different arguments are not
+    # compared together.
+    command_args: set[str] 
+    # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
+
+class Compare:
+
+    @staticmethod
+    def get_hist_avg(
+        result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian,
+        exclude: list[str] = []
+    ) -> dict[str, BenchmarkHistoricAverage]:
+
+        def get_timestamp(f: str) -> str:
+            """Extract timestamp from result filename"""
+            return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
+
+        def get_result_paths() -> list[str]:
+            """
+            Get a list of all results matching result_name in result_dir that is
+            newer than the timestamp specified by cutoff
+            """
+            cache_dir = Path(f"{result_dir}")
+
+            # List is sorted by filename: given our timestamp format, the
+            # timestamps are sorted from oldest to newest
+            return sorted(
+                filter(
+                    lambda f: f.is_file()
+                    and Validate.timestamp(get_timestamp(f))
+                    and get_timestamp(f) > cutoff
+                    # Result file is not excluded
+                    and f.stem not in exclude,
+                    # Assumes format is <name>_YYYYMMDD_HHMMSS.json
+                    cache_dir.glob(f"{result_name}_*_*.json")
+                )
+            )
+
+        # key: name of the benchmark test result
+        # value: { command_args: set[str], aggregate: Aggregator }
+        # 
+        # This is then used to build a dict[BenchmarkHistoricAverage] used
+        # to find historic averages.
+        average_aggregate: dict[str, dict] = dict()
+        
+        for result_path in get_result_paths():
+            with result_path.open('r') as result_f:
+                result = BenchmarkRun.from_json(json.load(result_f))
+            
+            if result.name != result_name:
+                print(f"Warning: Result file {result_path} has mismatching name {result.name}. Skipping file.")
+                continue
+
+            for test_run in result.results:
+                def reset_aggregate() -> dict:
+                    return { 
+                        "command_args": set(test_run.command[1:]),
+                        # The assumption here is that "value" is median
+                        # TODO standardization should happen here on what "value"
+                        # really is
+                        "aggregate": aggregator(starting_elements=[test_run.value])
+                    }
+
+                # Add every benchmark run to average_aggregate:
+                if test_run.name not in average_aggregate:
+                    average_aggregate[test_run.name] = reset_aggregate()
+                else:
+                    # Check that we are comparing runs with the same cmd args:
+                    if set(test_run.command[1:]) == average_aggregate[test_run.name]["command_args"]:
+                        average_aggregate[test_run.name]["aggregate"].add(test_run.value)
+                    else:
+                        # If the command args used between runs are different,
+                        # discard old run data and prefer new command args
+                        #
+                        # This relies on the fact that paths from get_result_paths()
+                        # is sorted from older to newer
+                        print(f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs.")
+                        print("DISCARDING older data and OVERRIDING with data using new arg.")
+                        average_aggregate[test_run.name] = reset_aggregate()
+            
+        return {
+            name: BenchmarkHistoricAverage(
+                name=name,
+                average_type=stats["aggregate"].get_type(),
+                value=stats["aggregate"].get_avg(),
+                command_args=stats["command_args"]
+            )
+            for name, stats in average_aggregate.items()
+        }
+    
+
+    def to_hist_avg(
+        hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str
+    ) -> tuple:
+        with open(compare_file, 'r') as compare_f:
+            compare_result = BenchmarkRun.from_json(json.load(compare_f))
+
+        improvement = []
+        regression = []
+
+        for test in compare_result.results:
+            if test.name not in hist_avg:
+                continue
+            if hist_avg[test.name].command_args != set(test.command[1:]):
+                print(f"Warning: skipped {test.name} due to command args mismatch.")
+                continue
+            
+            delta = 1 - (
+                test.value / hist_avg[test.name].value
+                if test.lower_is_better else 
+                hist_avg[test.name].value / test.value
+            )
+
+            def perf_diff_entry() -> dict:
+                res = asdict(test)
+                res["delta"] = delta
+                res["hist_avg"] = hist_avg[test.name].value
+                res["avg_type"] = hist_avg[test.name].average_type
+                return res
+
+            if delta > options.regression_threshold:
+                improvement.append(perf_diff_entry())
+            elif delta < -options.regression_threshold:
+                regression.append(perf_diff_entry())
+
+        return improvement, regression
+            
+
+
+
+    def to_hist(
+        avg_type: str, result_name: str, compare_name: str, result_dir: str, cutoff: str,
+        
+    ) -> tuple:
+        """
+        This function generates a historic average from results named result_name
+        in result_dir and compares it to the results in compare_file
+
+        Parameters:
+            result_name (str): Save name of the result
+            compare_name (str): Result file name to compare historic average against
+            result_dir (str): Directory to look for results in
+            cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
+            result included in the historic average calculation
+            avg_type (str): Type of "average" (measure of central tendency) to 
+            use in historic "average" calculation
+        """ 
+
+        if avg_type != "median":
+            print("Only median is currently supported: refusing to continue.")
+            exit(1)
+
+        # TODO call validator on cutoff timestamp
+        hist_avg = Compare.get_hist_avg(result_name, result_dir, cutoff, exclude=[compare_name])
+        return Compare.to_hist_avg(hist_avg, f"{result_dir}/{compare_name}.json")
+
+
+res = Compare.to_hist("median", "Baseline_PVC_L0", "Baseline_PVC_L0_20250314_170754", "./", "00000000_000000")
+print(res)
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index ced76a5d692f2..f4f7163c27497 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -45,5 +45,7 @@ class Options:
     preset: str = "Full"
     custom_results_dir = None
 
+    regression_threshold: float = 0.05
+
 
 options = Options()
diff --git a/devops/scripts/benchmarks/utils/aggregate.py b/devops/scripts/benchmarks/utils/aggregate.py
new file mode 100644
index 0000000000000..a6db4d36334c4
--- /dev/null
+++ b/devops/scripts/benchmarks/utils/aggregate.py
@@ -0,0 +1,105 @@
+import heapq
+import statistics
+from abc import ABC, abstractmethod
+
+
+class Aggregator(ABC):
+    """
+    Aggregator classes used to "aggregate" a pool of elements, and produce an
+    "average" (precisely, some "measure of central tendency") from the elements.
+    """
+
+    @staticmethod
+    @abstractmethod
+    def get_type() -> str:
+        """
+        Return a string indicating the type of average this aggregator
+        produces.
+        """
+        pass
+
+    @abstractmethod
+    def add(self, n: float):
+        """
+        Add/aggregate an element to the pool of elements used by this aggregator
+        to produce an average calculation.
+        """
+        pass
+
+    @abstractmethod
+    def get_avg(self) -> float:
+        """
+        Produce an average from the pool of elements aggregated using add().
+        """
+        pass
+
+
+class SimpleMedian(Aggregator):
+    """
+    Simple median calculation: if the number of samples being generated are low,
+    this is the fastest median method.
+    """
+
+    def __init__(self, starting_elements: list = []):
+        self.elements = starting_elements
+
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_avg(self) -> float:
+        return statistics.median(self.elements)
+
+
+class StreamingMedian(Aggregator):
+    """
+    Calculate medians incrementally using heaps: Theoretically the fastest way
+    to calculate a median from a stream of elements, but realistically is only
+    faster when dealing with huge numbers of samples that would be generated by
+    i.e. enabling this workflow in precommit and using longer periods of time.
+    """
+
+    def __init__(self, starting_elements: list = []):
+        # Gist: we keep a minheap and a maxheap, and store the median as the top
+        # of the minheap. When a new element comes it gets put into the heap
+        # based on if the element is bigger than the current median. Then, the
+        # heaps are heapified and the median is repopulated by heapify.
+        self.minheap_larger = []
+        self.maxheap_smaller = []
+
+        map(lambda n: self.add(n), starting_elements)
+
+    @staticmethod
+    def get_type() -> str:
+        return "median"
+
+    # Note: numbers on maxheap should be negative, as heapq
+    # is minheap by default
+
+    def add(self, n: float):
+        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
+            heapq.heappush(self.maxheap_smaller, -n)
+        else:
+            heapq.heappush(self.minheap_larger, n)
+
+        # Ensure minheap has more elements than maxheap
+        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
+            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
+        elif len(self.maxheap_smaller) < len(self.minheap_larger):
+            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
+
+    def get_avg(self) -> float:
+        if len(self.maxheap_smaller) == len(self.minheap_larger):
+            # Equal number of elements smaller and larger than "median":
+            # thus, there are two median values. The median would then become
+            # the average of both median values.
+            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
+        else:
+            # Otherwise, median is always in minheap, as minheap is always
+            # bigger
+            return -self.maxheap_smaller[0]
+
+
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
new file mode 100644
index 0000000000000..2d01255487a44
--- /dev/null
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -0,0 +1,22 @@
+import re
+
+class Validate:
+    """Static class containing methods for validating various fields"""
+
+    @staticmethod
+    def filepath(path: str) -> bool:
+        """
+        Returns True if path is clean (no illegal characters), otherwise False.
+        """
+        filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
+        return filepath_re.match(path) is not None
+
+    @staticmethod
+    def timestamp(t: str) -> bool:
+        """
+        Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
+        """
+        timestamp_re = re.compile(
+            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
+        )
+        return timestamp_re.match(t) is not None
\ No newline at end of file

From b7acba222bf409cc259eb6b14d2ac192fc0198d9 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Tue, 18 Mar 2025 11:30:36 +0000
Subject: [PATCH 054/114] cleanup options in js scripts and fix ordering on bar
 charts

---
 devops/scripts/benchmarks/html/scripts.js | 42 +++++++++++------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 175ab92862e17..82783a652dddc 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -12,10 +12,6 @@ let timeseriesData, barChartsData, allRunNames;
 // DOM Elements
 let runSelect, selectedRunsDiv, suiteFiltersContainer;
 
-// Add this at the top of the file with the other variable declarations
-let showNotes = true;
-let showUnstable = false;
-
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
     selectedRunsDiv.innerHTML = '';
@@ -215,7 +211,7 @@ function createChartContainer(data, canvasId, type) {
         const unstableWarning = document.createElement('div');
         unstableWarning.className = 'benchmark-unstable';
         unstableWarning.textContent = metadata.unstable;
-        unstableWarning.style.display = showUnstable ? 'block' : 'none';
+        unstableWarning.style.display = isUnstableEnabled() ? 'block' : 'none';
         container.appendChild(unstableWarning);
     }
 
@@ -224,7 +220,7 @@ function createChartContainer(data, canvasId, type) {
         const noteElement = document.createElement('div');
         noteElement.className = 'benchmark-note';
         noteElement.textContent = metadata.notes;
-        noteElement.style.display = showNotes ? 'block' : 'none';
+        noteElement.style.display = isNotesEnabled() ? 'block' : 'none';
         container.appendChild(noteElement);
     }
 
@@ -390,8 +386,8 @@ function updateURL() {
     }
 
     // Add toggle states to URL
-    url.searchParams.set('notes', showNotes);
-    url.searchParams.set('unstable', showUnstable);
+    url.searchParams.set('notes', isNotesEnabled());
+    url.searchParams.set('unstable', isUnstableEnabled());
 
     history.replaceState(null, '', url);
 }
@@ -409,16 +405,11 @@ function filterCharts() {
         // Hide unstable benchmarks if showUnstable is false
         const shouldShow = regex.test(label) &&
             activeSuites.includes(suite) &&
-            (showUnstable || !isUnstable);
+            (isUnstableEnabled() || !isUnstable);
 
         container.style.display = shouldShow ? '' : 'none';
     });
 
-    // Update notes visibility
-    document.querySelectorAll('.benchmark-note').forEach(note => {
-        note.style.display = showNotes ? 'block' : 'none';
-    });
-
     updateURL();
 }
 
@@ -464,7 +455,7 @@ function processTimeseriesData(benchmarkRuns) {
 function processBarChartsData(benchmarkRuns) {
     const groupedResults = {};
 
-    benchmarkRuns.forEach(run => {
+    benchmarkRuns.reverse().forEach(run => {
         run.results.forEach(result => {
             if (!result.explicit_group) return;
 
@@ -547,24 +538,31 @@ function setupSuiteFilters() {
     });
 }
 
+function isNotesEnabled() {
+    const notesToggle = document.getElementById('show-notes');
+    return notesToggle.checked;
+}
+
+function isUnstableEnabled() {
+    const unstableToggle = document.getElementById('show-unstable');
+    return unstableToggle.checked;
+}
+
 function setupToggles() {
     const notesToggle = document.getElementById('show-notes');
     const unstableToggle = document.getElementById('show-unstable');
 
     notesToggle.addEventListener('change', function() {
-        showNotes = this.checked;
         // Update all note elements visibility
         document.querySelectorAll('.benchmark-note').forEach(note => {
-            note.style.display = showNotes ? 'block' : 'none';
+            note.style.display = isNotesEnabled() ? 'block' : 'none';
         });
-        filterCharts();
     });
 
     unstableToggle.addEventListener('change', function() {
-        showUnstable = this.checked;
         // Update all unstable warning elements visibility
         document.querySelectorAll('.benchmark-unstable').forEach(warning => {
-            warning.style.display = showUnstable ? 'block' : 'none';
+            warning.style.display = isUnstableEnabled() ? 'block' : 'none';
         });
         filterCharts();
     });
@@ -574,12 +572,12 @@ function setupToggles() {
     const unstableParam = getQueryParam('unstable');
 
     if (notesParam !== null) {
-        showNotes = notesParam === 'true';
+        let showNotes = notesParam === 'true';
         notesToggle.checked = showNotes;
     }
 
     if (unstableParam !== null) {
-        showUnstable = unstableParam === 'true';
+        let showUnstable = unstableParam === 'true';
         unstableToggle.checked = showUnstable;
     }
 }

From e330a500f38f2c460c21eb72cfff8c2a28349af3 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Tue, 18 Mar 2025 11:45:53 +0000
Subject: [PATCH 055/114] use day on x axis for timeseries

---
 devops/scripts/benchmarks/html/scripts.js | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 82783a652dddc..a3ef986efdf14 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -105,7 +105,10 @@ function createChart(data, containerId, type) {
             }
         };
         options.scales.x = {
-            type: 'time',
+            type: 'timeseries',
+            time: {
+                unit: 'day'
+            },
             ticks: {
                 maxRotation: 45,
                 minRotation: 45,

From 0bd7488cc8ea2a9e82ccef5b284b26b72c041b6f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 18 Mar 2025 13:03:15 -0700
Subject: [PATCH 056/114] document + add main function to compare.py

---
 devops/scripts/benchmarks/compare.py | 139 ++++++++++++++++++++++++---
 1 file changed, 124 insertions(+), 15 deletions(-)

diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index 18efafa91a3d2..a5de7b51e5c43 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -1,16 +1,18 @@
+from utils.aggregate import SimpleMedian
+from utils.validate import Validate
+from utils.result import Result, BenchmarkRun
+from options import options
+
 import os
 import sys
 import json
+import argparse
 from pathlib import Path
 from dataclasses import dataclass, asdict
 
-from utils.aggregate import SimpleMedian
-from utils.validate import Validate
-from utils.result import Result, BenchmarkRun
-from options import options
-
 @dataclass
 class BenchmarkHistoricAverage:
+    """Contains historic average information for 1 benchmark"""
     # Name of benchmark as defined in Benchmark class definition
     name: str
 
@@ -32,14 +34,32 @@ class BenchmarkHistoricAverage:
     command_args: set[str] 
     # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
 
-class Compare:
 
+class Compare:
+    """Class containing logic for comparisons between results"""
     @staticmethod
     def get_hist_avg(
         result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian,
         exclude: list[str] = []
     ) -> dict[str, BenchmarkHistoricAverage]:
+        """
+        Create a historic average for results named result_name in result_dir
+        using the specified aggregator
+
+        Args:
+            result_name (str): Name of benchmarking result to obtain average for 
+            result_dir (str): Path to folder containing benchmark results
+            cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
+            average calcultaion
+            aggregator (Aggregator): The aggregator to use for calculating the
+            historic average
+            exclude (list[str]): List of filenames (only the stem) to exclude
+            from average calculation
 
+        Returns:
+            A dictionary mapping benchmark names to BenchmarkHistoricAverage
+            objects
+        """
         def get_timestamp(f: str) -> str:
             """Extract timestamp from result filename"""
             return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
@@ -121,6 +141,17 @@ def reset_aggregate() -> dict:
     def to_hist_avg(
         hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str
     ) -> tuple:
+        """
+        Compare results in compare_file to a pre-existing map of historic
+        averages
+
+        Args:
+            hist_avg (dict): A historic average map generated from get_hist_avg
+            compare_file (str): Full filepath of result to compare against
+
+        Returns:
+            A tuple returning (list of improved tests, list of regressed tests).
+        """
         with open(compare_file, 'r') as compare_f:
             compare_result = BenchmarkRun.from_json(json.load(compare_f))
 
@@ -153,19 +184,18 @@ def perf_diff_entry() -> dict:
                 regression.append(perf_diff_entry())
 
         return improvement, regression
-            
 
 
 
     def to_hist(
-        avg_type: str, result_name: str, compare_name: str, result_dir: str, cutoff: str,
+        avg_type: str, result_name: str, compare_file: str, result_dir: str, cutoff: str,
         
     ) -> tuple:
         """
-        This function generates a historic average from results named result_name
-        in result_dir and compares it to the results in compare_file
+        Pregenerate a historic average from results named result_name in
+        result_dir, and compares the results in compare_file to it
 
-        Parameters:
+        Args:
             result_name (str): Save name of the result
             compare_name (str): Result file name to compare historic average against
             result_dir (str): Directory to look for results in
@@ -173,6 +203,13 @@ def to_hist(
             result included in the historic average calculation
             avg_type (str): Type of "average" (measure of central tendency) to 
             use in historic "average" calculation
+
+        Returns:
+            A tuple returning (list of improved tests, list of regressed tests).
+            Each element in each list is a BenchmarkRun object with a hist_avg,
+            avg_type, and delta field added, indicating the historic average,
+            type of central tendency used for historic average, and the delta
+            from the average for this benchmark run.
         """ 
 
         if avg_type != "median":
@@ -180,9 +217,81 @@ def to_hist(
             exit(1)
 
         # TODO call validator on cutoff timestamp
-        hist_avg = Compare.get_hist_avg(result_name, result_dir, cutoff, exclude=[compare_name])
-        return Compare.to_hist_avg(hist_avg, f"{result_dir}/{compare_name}.json")
+        hist_avg = Compare.get_hist_avg(
+            result_name,
+            result_dir,
+            cutoff,
+            exclude=[Path(compare_file).stem]
+        )
+        return Compare.to_hist_avg(hist_avg, compare_file)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Compare benchmark results")
+    subparsers = parser.add_subparsers(dest="operation", required=True)
+    parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average")
+    parser_avg.add_argument(
+        "--avg_type",
+        type=str,
+        help="Measure of central tendency to use when computing historic average",
+        default="median"
+    )
+    parser_avg.add_argument(
+        "--name",
+        type=str,
+        required=True,
+        help="Save name of the benchmark results to compare to"
+    )
+    parser_avg.add_argument(
+        "--compare_file",
+        type=str,
+        required=True,
+        help="Result file to compare against te historic average"
+    )
+    parser_avg.add_argument(
+        "--results_dir",
+        type=str,
+        required=True,
+        help="Directory storing results"
+    )
+    parser_avg.add_argument(
+        "--cutoff",
+        type=str,
+        help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
+        default="20000101_010101"
+    )
+
+    args = parser.parse_args()
+
+    if args.operation == "to_hist":
+        if args.avg_type != "median":
+            print("Only median is currently supported: exiting.")
+            exit(1)
+        if not Validate.timestamp(args.cutoff):
+            raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
+
+        improvements, regressions = Compare.to_hist(
+            "median",
+            args.name,
+            args.compare_file,
+            args.results_dir,
+            args.cutoff
+        )
 
+        def print_regression(entry: dict):
+            """Print an entry outputted from Compare.to_hist"""
+            print(f"Test: {entry['name']}")
+            print(f"-- Historic {entry['avg_type']}: {entry['hist_avg']}")
+            print(f"-- Run result: {test['value']}")
+            print(f"-- Delta: {test['delta']}")
+            print("")
 
-res = Compare.to_hist("median", "Baseline_PVC_L0", "Baseline_PVC_L0_20250314_170754", "./", "00000000_000000")
-print(res)
+        if improvements:
+            print("#\n# Improvements:\n#\n")
+            for test in improvements: print_regression(test)
+        if regressions:
+            print("#\n# Regressions:\n#\n")
+            for test in regressions: print_regression(test)
+    else:
+        print("Unsupported operation: exiting.")
+        exit(1)
\ No newline at end of file

From 25fd917a2baf41e6e5a3cd6604e235bb4b907f6f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 18 Mar 2025 23:56:45 -0700
Subject: [PATCH 057/114] add compare to benchmark action

---
 devops/actions/run-tests/benchmark_v2/action.yml | 10 +++++++---
 devops/scripts/benchmarks/compare.py             |  1 +
 devops/scripts/benchmarks/history.py             | 14 +++++++++++---
 devops/scripts/benchmarks/main.py                | 10 ++++++++++
 devops/scripts/benchmarks/options.py             |  1 +
 5 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index bab571ec16ff2..de5b0c9791d02 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -100,6 +100,7 @@ runs:
 
       # TODO accomodate for different GPUs and backends
       SAVE_NAME="Baseline_PVC_L0"
+      SAVE_TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"
       if [ -n "$BUILD_HASH" ]; then
           SAVE_NAME="Commit_PVC_$BUILD_HASH"
       fi
@@ -112,14 +113,17 @@ runs:
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --preset Minimal
+        --timestamp-override "$SAVE_TIMESTAMP"
       echo "-----"
+      python3 ./devops/scripts/benchmarks/compare.py to_hist \
+        --name Baseline_PVC_L0 \
+        --compare_file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json"
+        --results_dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
+
   - name: Push compute-benchmarks results
     if: inputs.upload_results == 'true' && always()
     shell: bash
     run: |
-      # TODO redo configuration
-      # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
       git config user.email "sys_sycl_benchmarks@intel.com"
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index a5de7b51e5c43..ab53aea3844fc 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -292,6 +292,7 @@ def print_regression(entry: dict):
         if regressions:
             print("#\n# Regressions:\n#\n")
             for test in regressions: print_regression(test)
+            exit(1)  # Exit 1 to trigger github test failure
     else:
         print("Unsupported operation: exiting.")
         exit(1)
\ No newline at end of file
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 0b80c54ad7393..191189fa1c4a9 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -11,6 +11,7 @@
 from options import Compare, options
 from datetime import datetime, timezone
 from utils.utils import run
+from utils.validate import Validate
 
 
 class BenchmarkHistory:
@@ -30,7 +31,8 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
     def load(self, n: int):
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
-            return []
+            print(f"Warning: {results_dir} is not a valid directory: no historic results loaded.")
+            return
 
         # Get all JSON files in the results directory
         benchmark_files = list(results_dir.glob("*.json"))
@@ -38,7 +40,9 @@ def load(self, n: int):
         # Extract timestamp and sort files by it
         def extract_timestamp(file_path: Path) -> str:
             try:
-                return file_path.stem.split("_")[-1]
+                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json 
+                ts = file_path.stem[-len("YYYYMMDD_HHMMSS"):]
+                return ts if Validate.timestamp(ts) else ""
             except IndexError:
                 return ""
 
@@ -98,7 +102,11 @@ def save(self, save_name, results: list[Result], to_file=True):
         os.makedirs(results_dir, exist_ok=True)
 
         # Use formatted timestamp for the filename
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        timestamp = (
+            datetime.now().strftime("%Y%m%d_%H%M%S")
+            if options.timestamp_override is None else 
+            options.timestamp_override
+        )
         file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index c4445ddee28db..4dbbaed4d91b5 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -17,6 +17,7 @@
 from history import BenchmarkHistory
 from utils.utils import prepare_workdir
 from utils.compute_runtime import *
+from utils.validate import Validate
 from presets import enabled_suites, presets
 
 import argparse
@@ -481,6 +482,11 @@ def validate_and_parse_env_args(env_args):
         help="Specify a custom results directory",
         default=options.custom_results_dir,
     )
+    parser.add_argument(
+        "--timestamp-override",
+        type=str,
+        help="Used in CI to enforce use of same timestamp across scripts",
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -518,6 +524,10 @@ def validate_and_parse_env_args(env_args):
         if not os.path.isdir(args.output_dir):
             parser.error("Specified --output-dir is not a valid path")
         options.output_directory = os.path.abspath(args.output_dir)
+    if args.timestamp_override is not None:
+        if not Validate.timestamp(args.timestamp_override):
+            parser.error("--timestamp_override is not a valid timestamp")
+        options.timestamp_override = args.timestamp_override
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index f4f7163c27497..14717eb6db595 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -46,6 +46,7 @@ class Options:
     custom_results_dir = None
 
     regression_threshold: float = 0.05
+    timestamp_override: str = None
 
 
 options = Options()

From 5ff2249c1c0e22b09129323e6c06c424a9879470 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 13:09:53 -0700
Subject: [PATCH 058/114] [test] hijack aggregate for testing

---
 .../workflows/sycl-benchmark-aggregate.yml    | 83 +++++++++----------
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
index 87f7ef718160a..b6f391f3e1e6d 100644
--- a/.github/workflows/sycl-benchmark-aggregate.yml
+++ b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -1,52 +1,47 @@
-name: Aggregate compute-benchmark averages from historical data
-
-# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
-# how the benchmark results compare to a historical average: This historical
-# average is calculated in this workflow, which aggregates historical data and
-# produces measures of central tendency (median in this case) used for this
-# purpose.
+name: Test benchmark.yml using sycl-benchmark-aggregate
 
 on:
   workflow_dispatch:
     inputs:
-      lookback_days:
-        description: |
-          Number of days from today to look back in historical results for:
-          This sets the age limit of data used in average calculation: Any
-          benchmark results created before `lookback_days` from today is
-          excluded from being aggregated in the historical average. 
-        type: number
-        required: true
-  workflow_call:
-    inputs:
-      lookback_days:
-        type: number
-        required: true
-    secrets:
-      LLVM_SYCL_BENCHMARK_TOKEN:
-        description: |
-          Github token used by the faceless account to push newly calculated
-          medians.
-        required: true
-
+      commit_hash:
+        description: Specific commit hash to build SYCL from
+        type: string
+        required: false
+      upload_results:
+        description: 'Save and upload results'
+        type: choice
+        options:
+          - false
+          - true
+        default: true
+      runner:
+        type: choice
+        options:
+          - '["PVC_PERF"]'
+      backend:
+        description: Backend to use
+        type: choice
+        options:
+          - 'level_zero:gpu'
+        # TODO L0 V2 support
+      reset_intel_gpu:
+        description: Reset Intel GPUs
+        type: choice
+        options:
+          - false
+          - true
+        default: true
 
-permissions:
-  contents: read
+permissions: read-all
 
 jobs:
   aggregate:
-    name: Aggregate average (median) value for all metrics
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        sparse-checkout: |
-          devops/scripts/benchmarking
-          devops/benchmarking
-          devops/actions/benchmarking
-    - name: Aggregate benchmark results and produce historical average
-      uses: ./devops/actions/benchmarking/aggregate
-      with:
-        lookback_days: ${{ inputs.lookback_days }}
-      env:
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+    name: Test benchmark.yml
+    uses: ./.github/workflows/benchmark.yml
+    secrets: inherit
+    with:
+      commit_hash: ${{ inputs.commit_hash }}
+      upload_results: ${{ inputs.upload_results }}
+      runner: ${{ inputs.runner }}
+      backend: ${{ inputs.backend }}
+      reset_intel_gpu: ${{ inputs.reset_intel_gpu }}
\ No newline at end of file

From 1469a2a97946aa928f958aa212531e5295c225a0 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 13:35:05 -0700
Subject: [PATCH 059/114] add missing \, standardize cmd arg opts

---
 devops/actions/run-tests/benchmark_v2/action.yml | 6 +++---
 devops/scripts/benchmarks/compare.py             | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index de5b0c9791d02..b1722424420ed 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -112,13 +112,13 @@ runs:
         --output-html remote \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --preset Minimal
+        --preset Minimal \
         --timestamp-override "$SAVE_TIMESTAMP"
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
         --name Baseline_PVC_L0 \
-        --compare_file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json"
-        --results_dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
+        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json"
+        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
 
   - name: Push compute-benchmarks results
     if: inputs.upload_results == 'true' && always()
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index ab53aea3844fc..d538577b0ce35 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -231,7 +231,7 @@ def to_hist(
     subparsers = parser.add_subparsers(dest="operation", required=True)
     parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average")
     parser_avg.add_argument(
-        "--avg_type",
+        "--avg-type",
         type=str,
         help="Measure of central tendency to use when computing historic average",
         default="median"
@@ -243,13 +243,13 @@ def to_hist(
         help="Save name of the benchmark results to compare to"
     )
     parser_avg.add_argument(
-        "--compare_file",
+        "--compare-file",
         type=str,
         required=True,
         help="Result file to compare against te historic average"
     )
     parser_avg.add_argument(
-        "--results_dir",
+        "--results-dir",
         type=str,
         required=True,
         help="Directory storing results"

From d22b45eb22d00e6e0d41b5bc59b010c95d81a8ea Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 13:42:51 -0700
Subject: [PATCH 060/114] add missing \

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index b1722424420ed..52084ffb7d82a 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -117,7 +117,7 @@ runs:
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
         --name Baseline_PVC_L0 \
-        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json"
+        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json" \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
 
   - name: Push compute-benchmarks results

From ab25299d873d7015cbe5e85dc03238af91404824 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 13:54:48 -0700
Subject: [PATCH 061/114] add curly braces to escape _

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 52084ffb7d82a..fa03ec9a060c5 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -117,7 +117,7 @@ runs:
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
         --name Baseline_PVC_L0 \
-        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/$SAVE_NAME_$SAVE_TIMESTAMP.json" \
+        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
 
   - name: Push compute-benchmarks results

From cae7049c78c697b3ac94f931716d9efb53addcd8 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 19 Mar 2025 16:28:50 -0700
Subject: [PATCH 062/114] [benchmarks] Undo merging in prior tests

---
 devops/scripts/benchmarks/benches/compute.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index f375e6352e389..c26f645635d27 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -148,18 +148,6 @@ def benchmarks(self) -> list[Benchmark]:
             ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024),
             ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024),
             VectorSum(self),
-            MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1),
-            MemcpyExecute(self, 400, 8, 1024, 100, 1, 1, 1),
-            MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1),
-            MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100),
-            GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100),
         ]
 
         # Add UR-specific benchmarks

From 6bff3d695db298083e070656bd3b622060de98e5 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 10:13:26 +0000
Subject: [PATCH 063/114] add an option to limit build parallelism

---
 devops/scripts/benchmarks/benches/compute.py       |  2 +-
 devops/scripts/benchmarks/benches/llamacpp.py      |  2 +-
 devops/scripts/benchmarks/benches/syclbench.py     |  2 +-
 devops/scripts/benchmarks/benches/velocity.py      |  2 +-
 devops/scripts/benchmarks/main.py                  |  7 +++++++
 devops/scripts/benchmarks/options.py               |  3 ++-
 devops/scripts/benchmarks/utils/compute_runtime.py | 10 +++++-----
 7 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index c26f645635d27..e0a4a6f0cb741 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -64,7 +64,7 @@ def setup(self):
 
         run(configure_command, add_sycl=True)
 
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 38633912b001a..e2f0ee40cb417 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -67,7 +67,7 @@ def setup(self):
         run(configure_command, add_sycl=True)
 
         run(
-            f"cmake --build {self.build_path} -j",
+            f"cmake --build {self.build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.oneapi.ld_libraries(),
         )
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index b846b0853ce66..44f3ca16e8a35 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -51,7 +51,7 @@ def setup(self):
             ]
 
         run(configure_command, add_sycl=True)
-        run(f"cmake --build {build_path} -j", add_sycl=True)
+        run(f"cmake --build {build_path} -j {options.build_jobs}", add_sycl=True)
 
         self.built = True
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 916a321b143cf..2622177f7977e 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -101,7 +101,7 @@ def setup(self):
 
         run(configure_command, {"CC": "clang", "CXX": "clang++"}, add_sycl=True)
         run(
-            f"cmake --build {build_path} -j",
+            f"cmake --build {build_path} -j {options.build_jobs}",
             add_sycl=True,
             ld_library=self.ld_libraries(),
         )
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index c4445ddee28db..14e5fe1a04624 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -481,6 +481,12 @@ def validate_and_parse_env_args(env_args):
         help="Specify a custom results directory",
         default=options.custom_results_dir,
     )
+    parser.add_argument(
+        "--build-jobs",
+        type=int,
+        help="Number of build jobs to run simultaneously",
+        default=options.build_jobs,
+    )
 
     args = parser.parse_args()
     additional_env_vars = validate_and_parse_env_args(args.env)
@@ -508,6 +514,7 @@ def validate_and_parse_env_args(env_args):
     options.cublas_directory = args.cublas_directory
     options.preset = args.preset
     options.custom_results_dir = args.results_dir
+    options.build_jobs = args.build_jobs
 
     if args.build_igc and args.compute_runtime is None:
         parser.error("--build-igc requires --compute-runtime to be set")
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index ced76a5d692f2..267c7f8142c2f 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
+import multiprocessing
 
 from presets import presets
 
@@ -44,6 +45,6 @@ class Options:
     current_run_name: str = "This PR"
     preset: str = "Full"
     custom_results_dir = None
-
+    build_jobs: int = multiprocessing.cpu_count()
 
 options = Options()
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index f4864c112f640..85271726e715c 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -62,7 +62,7 @@ def build_gmmlib(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.gmmlib_build} -j")
+        run(f"cmake --build {self.gmmlib_build} -j {options.build_jobs}")
         run(f"cmake --install {self.gmmlib_build}")
         return self.gmmlib_install
 
@@ -87,7 +87,7 @@ def build_level_zero(self, repo, commit):
             f"-DCMAKE_BUILD_TYPE=Release",
         ]
         run(configure_command)
-        run(f"cmake --build {self.level_zero_build} -j")
+        run(f"cmake --build {self.level_zero_build} -j {options.build_jobs}")
         run(f"cmake --install {self.level_zero_build}")
         return self.level_zero_install
 
@@ -142,8 +142,8 @@ def build_igc(self, repo, commit):
         ]
         run(configure_command)
 
-        # set timeout to 30min. IGC takes A LONG time to build if building from scratch.
-        run(f"cmake --build {self.igc_build} -j", timeout=600 * 3)
+        # set timeout to 2h. IGC takes A LONG time to build if building from scratch.
+        run(f"cmake --build {self.igc_build} -j {options.build_jobs}", timeout=60 * 60 * 2)
         # cmake --install doesn't work...
         run("make install", cwd=self.igc_build)
         return self.igc_install
@@ -214,7 +214,7 @@ def build_compute_runtime(self):
             configure_command.append(f"-DIGC_DIR={self.igc}")
 
         run(configure_command)
-        run(f"cmake --build {self.compute_runtime_build} -j")
+        run(f"cmake --build {self.compute_runtime_build} -j {options.build_jobs}")
         return self.compute_runtime_build
 
 

From d2610c3cae88b7a8e87b4b461d40de828d56e59d Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 12:06:39 +0000
Subject: [PATCH 064/114] add support for benchmark tags

---
 devops/scripts/benchmarks/benches/base.py     | 32 ++++++++++++--
 devops/scripts/benchmarks/benches/compute.py  | 44 ++++++++++++++++++-
 devops/scripts/benchmarks/benches/llamacpp.py |  3 ++
 .../scripts/benchmarks/benches/syclbench.py   |  3 ++
 devops/scripts/benchmarks/benches/umf.py      |  3 ++
 devops/scripts/benchmarks/benches/velocity.py |  3 ++
 devops/scripts/benchmarks/utils/result.py     |  9 +++-
 7 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 3ca6e3a7b7d3b..6c8d0aa4b77dc 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -3,15 +3,36 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from dataclasses import dataclass
 import os
 import shutil
 from pathlib import Path
-from utils.result import BenchmarkMetadata, Result
+from utils.result import BenchmarkMetadata, BenchmarkTag, Result
 from options import options
 from utils.utils import download, run
-import urllib.request
-import tarfile
 
+benchmark_tags = [BenchmarkTag('sycl', 'Benchmark uses SYCL RT'),
+                  BenchmarkTag('ur', 'Benchmark uses Unified Runtime'),
+                  BenchmarkTag('L0', 'Benchmark uses L0 directly'),
+                  BenchmarkTag('umf', 'Benchmark uses UMF directly'),
+                  BenchmarkTag('micro', 'Microbenchmark focusing on a specific niche'),
+                  BenchmarkTag('application', 'Real application-based performance test'),
+                  BenchmarkTag('proxy', 'Benchmark that tries to implement a real application use-case'),
+                  BenchmarkTag('submit', 'Benchmark tests the kernel submit path'),
+                  BenchmarkTag('math', 'Benchmark tests math compute performance'),
+                  BenchmarkTag('memory', 'Benchmark tests memory transfer performance'),
+                  BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
+                  BenchmarkTag('graph', 'Benchmark tests graph performance'),]
+
+def translate_tags(tag_names: list[str]) -> list[BenchmarkTag]:
+    tags = []
+    for tag_name in tag_names:
+        for tag in benchmark_tags:
+            if tag.name == tag_name:
+                tags.append(tag)
+                break
+
+    return tags
 
 class Benchmark:
     def __init__(self, directory, suite):
@@ -105,15 +126,18 @@ def notes(self) -> str:
     def unstable(self) -> str:
         return None
 
+    def get_tags(self) -> list[str]:
+        return []
+
     def get_metadata(self) -> BenchmarkMetadata:
         return BenchmarkMetadata(
             type="benchmark",
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
+            tags=translate_tags(self.get_tags())
         )
 
-
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index e0a4a6f0cb741..2882b29dfa0af 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -7,7 +7,7 @@
 import csv
 import io
 from utils.utils import run, git_clone, create_build_path
-from .base import Benchmark, Suite
+from .base import Benchmark, Suite, translate_tags
 from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
@@ -26,6 +26,13 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
         RUNTIMES.UR: "Unified Runtime",
     }[runtime]
 
+def runtime_to_tag_name(runtime: RUNTIMES) -> str:
+    return {
+        RUNTIMES.SYCL: "sycl",
+        RUNTIMES.LEVEL_ZERO: "L0",
+        RUNTIMES.UR: "ur",
+    }[runtime]
+
 
 class ComputeBench(Suite):
     def __init__(self, directory):
@@ -77,10 +84,12 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
+                tags=translate_tags(['submit', 'micro'])
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
+                tags=translate_tags(['submit', 'micro'])
             ),
         }
 
@@ -265,6 +274,9 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
             bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
         )
 
+    def get_tags(self):
+        return ['submit', runtime_to_tag_name(self.runtime), 'micro']
+
     def name(self):
         order = "in order" if self.ioq else "out of order"
         completion_str = " with measure completion" if self.measure_completion else ""
@@ -327,6 +339,9 @@ def description(self) -> str:
             f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=100000",
@@ -357,6 +372,9 @@ def description(self) -> str:
             f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -384,6 +402,9 @@ def description(self) -> str:
             f"{self.destination} with {self.size} bytes per operation."
         )
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -413,6 +434,9 @@ def description(self) -> str:
     def lower_is_better(self):
         return False
 
+    def get_tags(self):
+        return ['memory', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -439,6 +463,9 @@ def description(self) -> str:
             "using SYCL."
         )
 
+    def get_tags(self):
+        return ['math', 'sycl', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
@@ -485,6 +512,9 @@ def description(self) -> str:
             f"from {src_type} to {dst_type} memory {events} events."
         )
 
+    def get_tags(self):
+        return ['memory', 'ur', 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--Ioq=1",
@@ -525,6 +555,9 @@ def name(self):
     def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
+    def get_tags(self):
+        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -557,6 +590,9 @@ def description(self) -> str:
     def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
+    def get_tags(self):
+        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -584,6 +620,9 @@ def description(self) -> str:
     def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=10000",
@@ -622,6 +661,9 @@ def description(self) -> str:
     def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
+    def get_tags(self):
+        return [runtime_to_tag_name(self.runtime), 'micro']
+
     def bin_args(self) -> list[str]:
         return [
             "--iterations=1000",
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index e2f0ee40cb417..f0b5694b52dc8 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -101,6 +101,9 @@ def description(self) -> str:
             "quantized model and leverages SYCL with oneDNN for acceleration."
         )
 
+    def get_tags(self):
+        return ['sycl', 'application']
+
     def lower_is_better(self):
         return False
 
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 44f3ca16e8a35..d9d435baa064e 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -112,6 +112,9 @@ def bin_args(self) -> list[str]:
     def extra_env_vars(self) -> dict:
         return {}
 
+    def get_tags(self):
+        return ['sycl', 'micro']
+
     def setup(self):
         self.benchmark_bin = os.path.join(
             self.directory, "sycl-bench-build", self.bench_name
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index e465d5e9e01c9..ea2ecfd175a85 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -74,6 +74,9 @@ def setup(self):
 
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
+    def get_tags(self):
+        return ['umf', 'allocation']
+
     def run(self, env_vars) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 2622177f7977e..6ff3178202481 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -118,6 +118,9 @@ def parse_output(self, stdout: str) -> float:
     def description(self) -> str:
         return ""
 
+    def get_tags(self):
+        return ['sycl', 'application']
+
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 2d9b7e914bd8d..0d450ad7b9154 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -27,7 +27,6 @@ class Result:
     name: str = ""
     lower_is_better: bool = True
     suite: str = "Unknown"
-    description: str = "No description provided."
 
 
 @dataclass_json
@@ -44,6 +43,13 @@ class BenchmarkRun:
     )
 
 
+@dataclass_json
+@dataclass
+class BenchmarkTag:
+    name: str
+    description: str = ""
+
+
 @dataclass_json
 @dataclass
 class BenchmarkMetadata:
@@ -51,3 +57,4 @@ class BenchmarkMetadata:
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None
+    tags: list[BenchmarkTag] = field(default_factory=list)

From ffc60bfd004c2577fd19bbe8255c625a13ff5994 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 12:49:00 +0100
Subject: [PATCH 065/114] support for tags in html

---
 devops/scripts/benchmarks/benches/base.py    |  12 +-
 devops/scripts/benchmarks/benches/compute.py |  10 +-
 devops/scripts/benchmarks/html/index.html    | 275 +-------------
 devops/scripts/benchmarks/html/scripts.js    | 143 ++++++-
 devops/scripts/benchmarks/html/styles.css    | 373 +++++++++++++++++++
 devops/scripts/benchmarks/output_html.py     |  32 +-
 devops/scripts/benchmarks/utils/result.py    |  13 +-
 7 files changed, 559 insertions(+), 299 deletions(-)
 create mode 100644 devops/scripts/benchmarks/html/styles.css

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 6c8d0aa4b77dc..16ff5605b08df 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -24,15 +24,7 @@
                   BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
                   BenchmarkTag('graph', 'Benchmark tests graph performance'),]
 
-def translate_tags(tag_names: list[str]) -> list[BenchmarkTag]:
-    tags = []
-    for tag_name in tag_names:
-        for tag in benchmark_tags:
-            if tag.name == tag_name:
-                tags.append(tag)
-                break
-
-    return tags
+benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
 class Benchmark:
     def __init__(self, directory, suite):
@@ -135,7 +127,7 @@ def get_metadata(self) -> BenchmarkMetadata:
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
-            tags=translate_tags(self.get_tags())
+            tags=self.get_tags()
         )
 
 class Suite:
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 2882b29dfa0af..00db6bdd224d1 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -7,7 +7,7 @@
 import csv
 import io
 from utils.utils import run, git_clone, create_build_path
-from .base import Benchmark, Suite, translate_tags
+from .base import Benchmark, Suite
 from utils.result import BenchmarkMetadata, Result
 from options import options
 from enum import Enum
@@ -84,12 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=translate_tags(['submit', 'micro'])
+                tags=['submit', 'micro', 'sycl', 'ur', 'l0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=translate_tags(['submit', 'micro'])
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+            ),
+            "SubmitGraph": BenchmarkMetadata(
+                type="group",
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0', 'graph']
             ),
         }
 
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index dc79c2a4781b6..41fe6996ed432 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -15,266 +15,7 @@
     <script src="data.js"></script>
     <script src="config.js"></script>
     <script src="scripts.js"></script>
-    <style>
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-            margin: 0;
-            padding: 16px;
-            background: #f8f9fa;
-        }
-        .container {
-            max-width: 1100px;
-            margin: 0 auto;
-        }
-        h1, h2 {
-            color: #212529;
-            text-align: center;
-            margin-bottom: 24px;
-            font-weight: 500;
-        }
-        .chart-container {
-            background: white;
-            border-radius: 8px;
-            padding: 24px;
-            margin-bottom: 24px;
-            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
-        }
-        @media (max-width: 768px) {
-            body {
-                padding: 12px;
-            }
-            .chart-container {
-                padding: 16px;
-                border-radius: 6px;
-            }
-            h1 {
-                font-size: 24px;
-                margin-bottom: 16px;
-            }
-        }
-        .filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-        }
-        .filter-container input {
-            padding: 8px;
-            font-size: 16px;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-            width: 400px;
-            max-width: 100%;
-        }
-        .suite-filter-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .suite-checkbox {
-            margin: 0 8px;
-        }
-        details {
-            margin-bottom: 24px;
-        }
-        summary {
-            display: flex;
-            justify-content: space-between;
-            align-items: center;
-            font-size: 16px;
-            font-weight: 500;
-            cursor: pointer;
-            padding: 8px;
-            background: #e9ecef;
-            border-radius: 8px;
-            user-select: none;
-        }
-        summary:hover {
-            background: #dee2e6;
-        }
-        .extra-info {
-            padding: 8px;
-            background: #f8f9fa;
-            border-radius: 8px;
-            margin-top: 8px;
-        }
-        .run-selector {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .run-selector select {
-            width: 300px;
-            padding: 8px;
-            margin-right: 8px;
-        }
-        .run-selector button {
-            padding: 8px 16px;
-            background: #0068B5;
-            color: white;
-            border: none;
-            border-radius: 4px;
-            cursor: pointer;
-        }
-        .run-selector button:hover {
-            background: #00C7FD;
-        }
-        .selected-runs {
-            margin-top: 12px;
-        }
-        .selected-run {
-            display: inline-block;
-            padding: 4px 8px;
-            margin: 4px;
-            background: #e2e6ea;
-            border-radius: 4px;
-        }
-        .selected-run button {
-            margin-left: 8px;
-            padding: 0 4px;
-            background: none;
-            border: none;
-            color: #dc3545;
-            cursor: pointer;
-        }
-        .download-button {
-            background: none;
-            border: none;
-            color: #0068B5;
-            cursor: pointer;
-            font-size: 16px;
-            padding: 4px;
-            margin-left: 8px;
-        }
-        .download-button:hover {
-            color: #00C7FD;
-        }
-        .loading-indicator {
-            text-align: center;
-            font-size: 18px;
-            color: #0068B5;
-            margin-bottom: 20px;
-        }
-        .extra-info-entry {
-            border: 1px solid #ddd;
-            padding: 10px;
-            margin-bottom: 10px;
-            background-color: #f9f9f9;
-            border-radius: 5px;
-        }
-        .extra-info-entry strong {
-            display: block;
-            margin-bottom: 5px;
-        }
-        .extra-info-entry em {
-            color: #555;
-        }
-        .display-options-container {
-            text-align: center;
-            margin-bottom: 24px;
-            padding: 16px;
-            background: #e9ecef;
-            border-radius: 8px;
-        }
-        .display-options-container label {
-            margin: 0 12px;
-            cursor: pointer;
-        }
-        .display-options-container input {
-            margin-right: 8px;
-        }
-        .benchmark-note {
-            background-color: #cfe2ff;
-            color: #084298;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #084298;
-            white-space: pre-line;
-        }
-        .benchmark-unstable {
-            background-color: #f8d7da;
-            color: #842029;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #842029;
-            white-space: pre-line;
-        }
-        .note-text {
-            color: #084298;
-        }
-        .unstable-warning {
-            color: #842029;
-            font-weight: bold;
-        }
-        .unstable-text {
-            color: #842029;
-        }
-        .options-container {
-            margin-bottom: 24px;
-            background: #e9ecef;
-            border-radius: 8px;
-            overflow: hidden;
-        }
-        .options-container summary {
-            padding: 12px 16px;
-            font-weight: 500;
-            cursor: pointer;
-            background: #dee2e6;
-            user-select: none;
-        }
-        .options-container summary:hover {
-            background: #ced4da;
-        }
-        .options-content {
-            padding: 16px;
-            display: flex;
-            flex-wrap: wrap;
-            gap: 24px;
-        }
-        .filter-section {
-            flex: 1;
-            min-width: 300px;
-        }
-        .filter-section h3 {
-            margin-top: 0;
-            margin-bottom: 12px;
-            font-size: 18px;
-            font-weight: 500;
-            text-align: left;
-        }
-        #suite-filters {
-            display: flex;
-            flex-wrap: wrap;
-            gap: 8px;
-        }
-        .display-options {
-            display: flex;
-            flex-direction: column;
-            gap: 8px;
-        }
-        .display-options label {
-            display: flex;
-            align-items: center;
-            cursor: pointer;
-        }
-        .display-options input {
-            margin-right: 8px;
-        }
-        .benchmark-description {
-            background-color: #f2f2f2;
-            color: #333;
-            padding: 10px;
-            margin-bottom: 10px;
-            border-radius: 5px;
-            border-left: 4px solid #6c757d;
-            white-space: pre-line;
-            font-style: italic;
-        }
-    </style>
+    <link rel="stylesheet" href="styles.css">
 </head>
 <body>
     <div class="container">
@@ -315,13 +56,23 @@ <h3>Display Options</h3>
                         </label>
                     </div>
                 </div>
+                
+                <div class="filter-section">
+                    <h3>Tags</h3>
+                    <div class="tag-filter-actions">
+                        <button onclick="toggleAllTags(false)">Clear All</button>
+                    </div>
+                    <div id="tag-filters">
+                        <!-- Tag checkboxes will be generated by JavaScript -->
+                    </div>
+                </div>
             </div>
         </details>
-        <details class="timeseries" open>
+        <details class="timeseries">
             <summary>Historical Results</summary>
             <div class="charts"></div>
         </details>
-        <details class="bar-charts" open>
+        <details class="bar-charts">
             <summary>Comparisons</summary>
             <div class="charts"></div>
         </details>
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index a3ef986efdf14..547bcc77bcf31 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -8,9 +8,10 @@ let activeRuns = new Set(defaultCompareNames);
 let chartInstances = new Map();
 let suiteNames = new Set();
 let timeseriesData, barChartsData, allRunNames;
+let activeTags = new Set();
 
 // DOM Elements
-let runSelect, selectedRunsDiv, suiteFiltersContainer;
+let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer;
 
 // Run selector functions
 function updateSelectedRuns(forceUpdate = true) {
@@ -218,6 +219,14 @@ function createChartContainer(data, canvasId, type) {
         container.appendChild(unstableWarning);
     }
 
+    // Add description if present in metadata (moved outside of details)
+    if (metadata && metadata.description) {
+        const descElement = document.createElement('div');
+        descElement.className = 'benchmark-description';
+        descElement.textContent = metadata.description;
+        container.appendChild(descElement);
+    }
+
     // Add notes if present
     if (metadata && metadata.notes) {
         const noteElement = document.createElement('div');
@@ -227,12 +236,29 @@ function createChartContainer(data, canvasId, type) {
         container.appendChild(noteElement);
     }
 
-    // Add description if present in metadata, but only for groups
-    if (metadata && metadata.description && metadata.type === "group") {
-        const descElement = document.createElement('div');
-        descElement.className = 'benchmark-description';
-        descElement.textContent = metadata.description;
-        container.appendChild(descElement);
+    // Add tags if present
+    if (metadata && metadata.tags) {
+        container.setAttribute('data-tags', metadata.tags.join(','));
+        
+        // Add tags display
+        const tagsContainer = document.createElement('div');
+        tagsContainer.className = 'benchmark-tags';
+        
+        metadata.tags.forEach(tag => {
+            const tagElement = document.createElement('span');
+            tagElement.className = 'tag';
+            tagElement.textContent = tag;
+            tagElement.setAttribute('data-tag', tag);
+            
+            // Add tooltip with tag description
+            if (benchmarkTags[tag]) {
+                tagElement.setAttribute('title', benchmarkTags[tag].description);
+            }
+            
+            tagsContainer.appendChild(tagElement);
+        });
+        
+        container.appendChild(tagsContainer);
     }
 
     const canvas = document.createElement('canvas');
@@ -358,6 +384,7 @@ function updateURL() {
     const regex = document.getElementById('bench-filter').value;
     const activeSuites = getActiveSuites();
     const activeRunsList = Array.from(activeRuns);
+    const activeTagsList = Array.from(activeTags);
 
     if (regex) {
         url.searchParams.set('regex', regex);
@@ -371,6 +398,13 @@ function updateURL() {
         url.searchParams.delete('suites');
     }
 
+    // Add tags to URL
+    if (activeTagsList.length > 0) {
+        url.searchParams.set('tags', activeTagsList.join(','));
+    } else {
+        url.searchParams.delete('tags');
+    }
+
     // Handle the runs parameter
     if (activeRunsList.length > 0) {
         // Check if the active runs are the same as default runs
@@ -404,11 +438,18 @@ function filterCharts() {
         const label = container.getAttribute('data-label');
         const suite = container.getAttribute('data-suite');
         const isUnstable = container.getAttribute('data-unstable') === 'true';
+        const tags = container.getAttribute('data-tags') ? 
+                    container.getAttribute('data-tags').split(',') : [];
+
+        // Check if benchmark has all active tags (if any are selected)
+        const hasAllActiveTags = activeTags.size === 0 || 
+                               Array.from(activeTags).every(tag => tags.includes(tag));
 
         // Hide unstable benchmarks if showUnstable is false
         const shouldShow = regex.test(label) &&
             activeSuites.includes(suite) &&
-            (isUnstableEnabled() || !isUnstable);
+            (isUnstableEnabled() || !isUnstable) &&
+            hasAllActiveTags;
 
         container.style.display = shouldShow ? '' : 'none';
     });
@@ -585,6 +626,77 @@ function setupToggles() {
     }
 }
 
+function setupTagFilters() {
+    tagFiltersContainer = document.getElementById('tag-filters');
+    
+    // Get all unique tags from benchmark metadata
+    const allTags = new Set();
+    
+    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
+        if (metadata.tags) {
+            metadata.tags.forEach(tag => allTags.add(tag));
+        }
+    }
+    
+    // Sort tags alphabetically
+    const sortedTags = Array.from(allTags).sort();
+    
+    // Create tag filter elements
+    sortedTags.forEach(tag => {
+        const tagContainer = document.createElement('div');
+        tagContainer.className = 'tag-filter';
+        
+        const checkbox = document.createElement('input');
+        checkbox.type = 'checkbox';
+        checkbox.id = `tag-${tag}`;
+        checkbox.className = 'tag-checkbox';
+        checkbox.dataset.tag = tag;
+        
+        const label = document.createElement('label');
+        label.htmlFor = `tag-${tag}`;
+        label.textContent = tag;
+        
+        // Add info icon with tooltip if tag description exists
+        if (benchmarkTags[tag]) {
+            const infoIcon = document.createElement('span');
+            infoIcon.className = 'tag-info';
+            infoIcon.textContent = 'ⓘ';
+            infoIcon.title = benchmarkTags[tag].description;
+            label.appendChild(infoIcon);
+        }
+        
+        checkbox.addEventListener('change', function() {
+            if (this.checked) {
+                activeTags.add(tag);
+            } else {
+                activeTags.delete(tag);
+            }
+            filterCharts();
+        });
+        
+        tagContainer.appendChild(checkbox);
+        tagContainer.appendChild(label);
+        tagFiltersContainer.appendChild(tagContainer);
+    });
+}
+
+function toggleAllTags(select) {
+    const checkboxes = document.querySelectorAll('.tag-checkbox');
+    
+    checkboxes.forEach(checkbox => {
+        checkbox.checked = select;
+        const tag = checkbox.dataset.tag;
+        
+        if (select) {
+            activeTags.add(tag);
+        } else {
+            activeTags.delete(tag);
+        }
+    });
+    
+    filterCharts();
+}
+
 function initializeCharts() {
     // Process raw data
     timeseriesData = processTimeseriesData(benchmarkRuns);
@@ -621,11 +733,13 @@ function initializeCharts() {
     // Setup UI components
     setupRunSelector();
     setupSuiteFilters();
+    setupTagFilters();
     setupToggles();
 
     // Apply URL parameters
     const regexParam = getQueryParam('regex');
     const suitesParam = getQueryParam('suites');
+    const tagsParam = getQueryParam('tags');
 
     if (regexParam) {
         document.getElementById('bench-filter').value = regexParam;
@@ -638,6 +752,18 @@ function initializeCharts() {
         });
     }
 
+    // Apply tag filters from URL
+    if (tagsParam) {
+        const tags = tagsParam.split(',');
+        tags.forEach(tag => {
+            const checkbox = document.querySelector(`.tag-checkbox[data-tag="${tag}"]`);
+            if (checkbox) {
+                checkbox.checked = true;
+                activeTags.add(tag);
+            }
+        });
+    }
+
     // Setup event listeners
     document.querySelectorAll('.suite-checkbox').forEach(checkbox => {
         checkbox.addEventListener('change', filterCharts);
@@ -651,6 +777,7 @@ function initializeCharts() {
 // Make functions available globally for onclick handlers
 window.addSelectedRun = addSelectedRun;
 window.removeRun = removeRun;
+window.toggleAllTags = toggleAllTags;
 
 // Load data based on configuration
 function loadData() {
diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css
new file mode 100644
index 0000000000000..9a3c5fe69b287
--- /dev/null
+++ b/devops/scripts/benchmarks/html/styles.css
@@ -0,0 +1,373 @@
+body {
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+    margin: 0;
+    padding: 16px;
+    background: #f8f9fa;
+}
+.container {
+    max-width: 1100px;
+    margin: 0 auto;
+}
+h1, h2 {
+    color: #212529;
+    text-align: center;
+    margin-bottom: 24px;
+    font-weight: 500;
+}
+.chart-container {
+    background: white;
+    border-radius: 8px;
+    padding: 24px;
+    margin-bottom: 24px;
+    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+}
+@media (max-width: 768px) {
+    body {
+        padding: 12px;
+    }
+    .chart-container {
+        padding: 16px;
+        border-radius: 6px;
+    }
+    h1 {
+        font-size: 24px;
+        margin-bottom: 16px;
+    }
+}
+.filter-container {
+    text-align: center;
+    margin-bottom: 24px;
+}
+.filter-container input {
+    padding: 8px;
+    font-size: 16px;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+    width: 400px;
+    max-width: 100%;
+}
+.suite-filter-container {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.suite-checkbox {
+    margin: 0 8px;
+}
+details {
+    margin-bottom: 24px;
+}
+summary {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    font-size: 16px;
+    font-weight: 500;
+    cursor: pointer;
+    padding: 12px 16px;
+    background: #dee2e6;
+    border-radius: 8px;
+    user-select: none;
+}
+summary:hover {
+    background: #ced4da;
+}
+summary::marker {
+    display: none;
+}
+summary::-webkit-details-marker {
+    display: none;
+}
+summary::after {
+    content: "▼";
+    font-size: 12px;
+    margin-left: 8px;
+    transition: transform 0.3s;
+}
+details[open] summary::after {
+    transform: rotate(180deg);
+}
+.extra-info {
+    padding: 8px;
+    background: #f8f9fa;
+    border-radius: 8px;
+    margin-top: 8px;
+}
+.run-selector {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.run-selector select {
+    width: 300px;
+    padding: 8px;
+    margin-right: 8px;
+}
+.run-selector button {
+    padding: 8px 16px;
+    background: #0068B5;
+    color: white;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+}
+.run-selector button:hover {
+    background: #00C7FD;
+}
+.selected-runs {
+    margin-top: 12px;
+}
+.selected-run {
+    display: inline-block;
+    padding: 4px 8px;
+    margin: 4px;
+    background: #e2e6ea;
+    border-radius: 4px;
+}
+.selected-run button {
+    margin-left: 8px;
+    padding: 0 4px;
+    background: none;
+    border: none;
+    color: #dc3545;
+    cursor: pointer;
+}
+.download-button {
+    background: none;
+    border: none;
+    color: #0068B5;
+    cursor: pointer;
+    font-size: 16px;
+    padding: 4px;
+    margin-left: 8px;
+}
+.download-button:hover {
+    color: #00C7FD;
+}
+.loading-indicator {
+    text-align: center;
+    font-size: 18px;
+    color: #0068B5;
+    margin-bottom: 20px;
+}
+.extra-info-entry {
+    border: 1px solid #ddd;
+    padding: 10px;
+    margin-bottom: 10px;
+    background-color: #f9f9f9;
+    border-radius: 5px;
+}
+.extra-info-entry strong {
+    display: block;
+    margin-bottom: 5px;
+}
+.extra-info-entry em {
+    color: #555;
+}
+.display-options-container {
+    text-align: center;
+    margin-bottom: 24px;
+    padding: 16px;
+    background: #e9ecef;
+    border-radius: 8px;
+}
+.display-options-container label {
+    margin: 0 12px;
+    cursor: pointer;
+}
+.display-options-container input {
+    margin-right: 8px;
+}
+.benchmark-note {
+    background-color: #cfe2ff;
+    color: #084298;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #084298;
+    white-space: pre-line;
+}
+.benchmark-unstable {
+    background-color: #f8d7da;
+    color: #842029;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #842029;
+    white-space: pre-line;
+}
+.note-text {
+    color: #084298;
+}
+.unstable-warning {
+    color: #842029;
+    font-weight: bold;
+}
+.unstable-text {
+    color: #842029;
+}
+.options-container {
+    margin-bottom: 24px;
+    background: #e9ecef;
+    border-radius: 8px;
+    overflow: hidden;
+}
+.options-container summary {
+    padding: 12px 16px;
+    font-weight: 500;
+    cursor: pointer;
+    background: #dee2e6;
+    user-select: none;
+}
+.options-container summary:hover {
+    background: #ced4da;
+}
+.options-content {
+    padding: 16px;
+    display: flex;
+    flex-wrap: wrap;
+    gap: 24px;
+}
+.filter-section {
+    flex: 1;
+    min-width: 300px;
+}
+.filter-section h3 {
+    margin-top: 0;
+    margin-bottom: 12px;
+    font-size: 18px;
+    font-weight: 500;
+    text-align: left;
+}
+#suite-filters {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+}
+.display-options {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+.display-options label {
+    display: flex;
+    align-items: center;
+    cursor: pointer;
+}
+.display-options input {
+    margin-right: 8px;
+}
+.benchmark-description {
+    background-color: #f2f2f2;
+    color: #333;
+    padding: 10px;
+    margin-bottom: 10px;
+    border-radius: 5px;
+    border-left: 4px solid #6c757d;
+    white-space: pre-line;
+    font-style: italic;
+}
+/* Tag styles */
+.benchmark-tags {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 4px;
+    margin-bottom: 10px;
+}
+
+.tag {
+    display: inline-block;
+    background-color: #e2e6ea;
+    color: #495057;
+    padding: 2px 8px;
+    border-radius: 12px;
+    font-size: 12px;
+    cursor: default;
+}
+
+.tag-filter {
+    display: inline-flex;
+    align-items: center;
+    margin: 4px;
+}
+
+.tag-filter label {
+    margin-left: 4px;
+    cursor: pointer;
+    display: flex;
+    align-items: center;
+}
+
+.tag-info {
+    color: #0068B5;
+    margin-left: 4px;
+    cursor: help;
+    font-size: 12px;
+}
+
+#tag-filters {
+    display: flex;
+    flex-wrap: wrap;
+    max-height: 200px;
+    overflow-y: auto;
+    border: 1px solid #dee2e6;
+    border-radius: 4px;
+    padding: 8px;
+    background-color: #f8f9fa;
+}
+
+.tag-filter-actions {
+    margin-bottom: 8px;
+    display: flex;
+    gap: 8px;
+}
+
+.tag-filter-actions button {
+    padding: 4px 8px;
+    background: #e2e6ea;
+    border: none;
+    border-radius: 4px;
+    cursor: pointer;
+}
+
+.tag-filter-actions button:hover {
+    background: #ced4da;
+}
+
+#active-tags {
+    display: none;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-top: 12px;
+    padding: 8px;
+    background-color: #f8f9fa;
+    border-radius: 4px;
+}
+
+.active-tag {
+    display: flex;
+    align-items: center;
+    background-color: #0068B5;
+    color: white;
+    padding: 4px 8px;
+    border-radius: 12px;
+    font-size: 14px;
+}
+
+.remove-tag {
+    background: none;
+    border: none;
+    color: white;
+    margin-left: 4px;
+    cursor: pointer;
+    font-size: 16px;
+    padding: 0 4px;
+}
+
+.remove-tag:hover {
+    color: #f8d7da;
+}
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index e69dfeb153b49..429b24eb632c8 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -6,7 +6,8 @@
 import json
 import os
 from options import options
-from utils.result import BenchmarkMetadata
+from utils.result import BenchmarkMetadata, BenchmarkOutput
+from benches.base import benchmark_tags, benchmark_tags_dict
 
 
 def generate_html(
@@ -16,30 +17,33 @@ def generate_html(
     metadata: dict[str, BenchmarkMetadata],
 ):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
-    serializable_metadata = {k: v.__dict__ for k, v in metadata.items()}
-
-    serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs]
-
-    data = {
-        "runs": serializable_runs,
-        "metadata": serializable_metadata,
-        "defaultCompareNames": compare_names,
-    }
+    
+    # Create the comprehensive output object
+    output = BenchmarkOutput(
+        runs=benchmark_runs,
+        metadata=metadata,
+        tags=benchmark_tags_dict,
+        default_compare_names=compare_names
+    )
 
     if options.output_html == "local":
         data_path = os.path.join(html_path, "data.js")
         with open(data_path, "w") as f:
             # For local format, we need to write JavaScript variable assignments
             f.write("benchmarkRuns = ")
-            json.dump(data["runs"], f, indent=2)
+            json.dump(json.loads(output.to_json())["runs"], f, indent=2)
             f.write(";\n\n")
 
             f.write("benchmarkMetadata = ")
-            json.dump(data["metadata"], f, indent=2)
+            json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
+            f.write(";\n\n")
+            
+            f.write("benchmarkTags = ")
+            json.dump(json.loads(output.to_json())["tags"], f, indent=2)
             f.write(";\n\n")
 
             f.write("defaultCompareNames = ")
-            json.dump(data["defaultCompareNames"], f, indent=2)
+            json.dump(output.default_compare_names, f, indent=2)
             f.write(";\n")
 
         print(f"See {os.getcwd()}/html/index.html for the results.")
@@ -47,7 +51,7 @@ def generate_html(
         # For remote format, we write a single JSON file
         data_path = os.path.join(html_path, "data.json")
         with open(data_path, "w") as f:
-            json.dump(data, f, indent=2)
+            json.dump(json.loads(output.to_json()), f, indent=2)
 
         print(
             f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 0d450ad7b9154..82fc7ca1fddc2 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import Optional, Dict, List, Any
 from dataclasses_json import config, dataclass_json
 from datetime import datetime
 
@@ -57,4 +57,13 @@ class BenchmarkMetadata:
     description: Optional[str] = None
     notes: Optional[str] = None
     unstable: Optional[str] = None
-    tags: list[BenchmarkTag] = field(default_factory=list)
+    tags: list[str] = field(default_factory=list)  # Changed to list of tag names
+
+
+@dataclass_json
+@dataclass
+class BenchmarkOutput:
+    runs: list[BenchmarkRun]
+    metadata: Dict[str, BenchmarkMetadata]
+    tags: Dict[str, BenchmarkTag]
+    default_compare_names: List[str] = field(default_factory=list)

From 3662b430fa20585aebeec6a256433160b7e8764d Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 13:01:40 +0100
Subject: [PATCH 066/114] tiny tweaks for benchmark tags

---
 devops/scripts/benchmarks/benches/base.py    |  2 +-
 devops/scripts/benchmarks/benches/compute.py |  4 ++--
 devops/scripts/benchmarks/html/scripts.js    | 20 +++++++++-----------
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 16ff5605b08df..209dc993ae53c 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -110,7 +110,7 @@ def name(self):
         raise NotImplementedError()
 
     def description(self):
-        return "No description provided."
+        return ""
 
     def notes(self) -> str:
         return None
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 00db6bdd224d1..bff535dd2c335 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -84,12 +84,12 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'sycl', 'ur', 'l0']
+                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+                tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph']
             ),
             "SubmitGraph": BenchmarkMetadata(
                 type="group",
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 547bcc77bcf31..fbfb496533194 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -628,21 +628,19 @@ function setupToggles() {
 
 function setupTagFilters() {
     tagFiltersContainer = document.getElementById('tag-filters');
+
+    const allTags = [];
     
-    // Get all unique tags from benchmark metadata
-    const allTags = new Set();
-    
-    for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
-        if (metadata.tags) {
-            metadata.tags.forEach(tag => allTags.add(tag));
+    if (benchmarkTags) {
+        for (const tag in benchmarkTags) {
+            if (!allTags.includes(tag)) {
+                allTags.push(tag);
+            }
         }
     }
-    
-    // Sort tags alphabetically
-    const sortedTags = Array.from(allTags).sort();
-    
+
     // Create tag filter elements
-    sortedTags.forEach(tag => {
+    allTags.forEach(tag => {
         const tagContainer = document.createElement('div');
         tagContainer.className = 'tag-filter';
         

From 75dd2294adb0682dcab400ce66897ee2d404bbc6 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 13:23:26 +0100
Subject: [PATCH 067/114] better and more tags

---
 devops/scripts/benchmarks/benches/base.py     | 31 +++++++++++-------
 devops/scripts/benchmarks/benches/compute.py  | 32 +++++++++----------
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 11 ++++++-
 devops/scripts/benchmarks/benches/umf.py      |  2 +-
 devops/scripts/benchmarks/benches/velocity.py | 29 ++++++++++++++++-
 6 files changed, 75 insertions(+), 32 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 209dc993ae53c..901235f6e1455 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -11,18 +11,25 @@
 from options import options
 from utils.utils import download, run
 
-benchmark_tags = [BenchmarkTag('sycl', 'Benchmark uses SYCL RT'),
-                  BenchmarkTag('ur', 'Benchmark uses Unified Runtime'),
-                  BenchmarkTag('L0', 'Benchmark uses L0 directly'),
-                  BenchmarkTag('umf', 'Benchmark uses UMF directly'),
-                  BenchmarkTag('micro', 'Microbenchmark focusing on a specific niche'),
-                  BenchmarkTag('application', 'Real application-based performance test'),
-                  BenchmarkTag('proxy', 'Benchmark that tries to implement a real application use-case'),
-                  BenchmarkTag('submit', 'Benchmark tests the kernel submit path'),
-                  BenchmarkTag('math', 'Benchmark tests math compute performance'),
-                  BenchmarkTag('memory', 'Benchmark tests memory transfer performance'),
-                  BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'),
-                  BenchmarkTag('graph', 'Benchmark tests graph performance'),]
+benchmark_tags = [
+    BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'),
+    BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'),
+    BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'),
+    BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'),
+    BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'),
+    BenchmarkTag('application', 'Real application-based performance test'),
+    BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'),
+    BenchmarkTag('submit', 'Tests kernel submission performance'),
+    BenchmarkTag('math', 'Tests math computation performance'),
+    BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'),
+    BenchmarkTag('allocation', 'Tests memory allocation performance'),
+    BenchmarkTag('graph', 'Tests graph-based execution performance'),
+    BenchmarkTag('latency', 'Measures operation latency'),
+    BenchmarkTag('throughput', 'Measures operation throughput'),
+    BenchmarkTag('inference', 'Tests ML/AI inference performance'),
+    BenchmarkTag('image', 'Image processing benchmark'),
+    BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'),
+]
 
 benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index bff535dd2c335..e1f24e0178789 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -28,9 +28,9 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
 
 def runtime_to_tag_name(runtime: RUNTIMES) -> str:
     return {
-        RUNTIMES.SYCL: "sycl",
+        RUNTIMES.SYCL: "SYCL",
         RUNTIMES.LEVEL_ZERO: "L0",
-        RUNTIMES.UR: "ur",
+        RUNTIMES.UR: "UR",
     }[runtime]
 
 
@@ -84,16 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0']
+                tags=['submit', 'micro', 'SYCL', 'UR', 'L0']
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph']
+                    tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph']
             ),
             "SubmitGraph": BenchmarkMetadata(
                 type="group",
-                tags=['submit', 'micro', 'sycl', 'ur', 'L0', 'graph']
+                tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph']
             ),
         }
 
@@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         )
 
     def get_tags(self):
-        return ['submit', runtime_to_tag_name(self.runtime), 'micro']
+        return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro']
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
@@ -344,7 +344,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'submit', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -377,7 +377,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -407,7 +407,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'latency', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -439,7 +439,7 @@ def lower_is_better(self):
         return False
 
     def get_tags(self):
-        return ['memory', 'sycl', 'micro']
+        return ['memory', 'throughput', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -468,7 +468,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['math', 'sycl', 'micro']
+        return ['math', 'throughput', 'SYCL', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -517,7 +517,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'ur', 'micro']
+        return ['memory', 'latency', 'UR', 'micro']
 
     def bin_args(self) -> list[str]:
         return [
@@ -560,7 +560,7 @@ def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory']
+        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency']
 
     def bin_args(self) -> list[str]:
         return [
@@ -595,7 +595,7 @@ def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit']
+        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency']
 
     def bin_args(self) -> list[str]:
         return [
@@ -625,7 +625,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro']
+        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
 
     def bin_args(self) -> list[str]:
         return [
@@ -666,7 +666,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro']
+        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
 
     def bin_args(self) -> list[str]:
         return [
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index f0b5694b52dc8..cf203bca17f4f 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -102,7 +102,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['sycl', 'application']
+        return ['SYCL', 'application', 'inference', 'throughput']
 
     def lower_is_better(self):
         return False
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index d9d435baa064e..50f35182eaddc 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -113,7 +113,16 @@ def extra_env_vars(self) -> dict:
         return {}
 
     def get_tags(self):
-        return ['sycl', 'micro']
+        base_tags = ['SYCL', 'micro']
+        if "Memory" in self.bench_name or "mem" in self.bench_name.lower():
+            base_tags.append('memory')
+        if "Reduction" in self.bench_name:
+            base_tags.append('math')
+        if "Bandwidth" in self.bench_name:
+            base_tags.append('throughput')
+        if "Latency" in self.bench_name:
+            base_tags.append('latency')
+        return base_tags
 
     def setup(self):
         self.benchmark_bin = os.path.join(
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index ea2ecfd175a85..60964fcf93298 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -75,7 +75,7 @@ def setup(self):
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
     def get_tags(self):
-        return ['umf', 'allocation']
+        return ['UMF', 'allocation', 'latency', 'micro']
 
     def run(self, env_vars) -> list[Result]:
         command = [
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 6ff3178202481..623079067b91d 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -119,7 +119,7 @@ def description(self) -> str:
         return ""
 
     def get_tags(self):
-        return ['sycl', 'application']
+        return ['SYCL', 'application']
 
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
@@ -175,6 +175,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse keys per second from benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'throughput']
+
 
 class Bitcracker(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -213,6 +216,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'throughput']
+
 
 class SobelFilter(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -259,6 +265,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'image', 'throughput']
+
 
 class QuickSilver(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -306,6 +315,9 @@ def parse_output(self, stdout: str) -> float:
                 "{self.__class__.__name__}: Failed to parse benchmark output."
             )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'simulation', 'throughput']
+
 
 class Easywave(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -370,6 +382,9 @@ def parse_output(self, stdout: str) -> float:
             os.path.join(options.benchmark_cwd, "easywave.log")
         )
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'simulation']
+
 
 class CudaSift(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -398,6 +413,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'image']
+
 
 class DLCifar(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -449,6 +467,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference', 'image']
+
 
 class DLMnist(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -534,6 +555,9 @@ def parse_output(self, stdout: str) -> float:
         else:
             raise ValueError("Failed to parse benchmark output.")
 
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference', 'image']
+
 
 class SVM(VelocityBase):
     def __init__(self, vb: VelocityBench):
@@ -576,3 +600,6 @@ def parse_output(self, stdout: str) -> float:
             return float(match.group(1))
         else:
             raise ValueError("Failed to parse benchmark output.")
+
+    def get_tags(self):
+        return ['SYCL', 'application', 'inference']

From cec8f05d40a00981e04c97ecb0abb47b4d2fa4de Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 14:31:27 +0100
Subject: [PATCH 068/114] formatting

---
 devops/scripts/benchmarks/benches/base.py     | 38 +++++++--------
 devops/scripts/benchmarks/benches/compute.py  | 43 +++++++++++------
 devops/scripts/benchmarks/benches/llamacpp.py |  2 +-
 .../scripts/benchmarks/benches/syclbench.py   | 10 ++--
 devops/scripts/benchmarks/benches/umf.py      |  2 +-
 devops/scripts/benchmarks/benches/velocity.py | 20 ++++----
 devops/scripts/benchmarks/html/index.html     | 19 ++++----
 devops/scripts/benchmarks/html/styles.css     | 46 ++++++-------------
 devops/scripts/benchmarks/options.py          |  1 +
 devops/scripts/benchmarks/output_html.py      |  6 +--
 .../benchmarks/utils/compute_runtime.py       |  5 +-
 11 files changed, 96 insertions(+), 96 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py
index 901235f6e1455..4c2973d250e3d 100644
--- a/devops/scripts/benchmarks/benches/base.py
+++ b/devops/scripts/benchmarks/benches/base.py
@@ -12,27 +12,28 @@
 from utils.utils import download, run
 
 benchmark_tags = [
-    BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'),
-    BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'),
-    BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'),
-    BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'),
-    BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'),
-    BenchmarkTag('application', 'Real application-based performance test'),
-    BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'),
-    BenchmarkTag('submit', 'Tests kernel submission performance'),
-    BenchmarkTag('math', 'Tests math computation performance'),
-    BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'),
-    BenchmarkTag('allocation', 'Tests memory allocation performance'),
-    BenchmarkTag('graph', 'Tests graph-based execution performance'),
-    BenchmarkTag('latency', 'Measures operation latency'),
-    BenchmarkTag('throughput', 'Measures operation throughput'),
-    BenchmarkTag('inference', 'Tests ML/AI inference performance'),
-    BenchmarkTag('image', 'Image processing benchmark'),
-    BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'),
+    BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
+    BenchmarkTag("UR", "Benchmark uses Unified Runtime API"),
+    BenchmarkTag("L0", "Benchmark uses Level Zero API directly"),
+    BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"),
+    BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"),
+    BenchmarkTag("application", "Real application-based performance test"),
+    BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"),
+    BenchmarkTag("submit", "Tests kernel submission performance"),
+    BenchmarkTag("math", "Tests math computation performance"),
+    BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"),
+    BenchmarkTag("allocation", "Tests memory allocation performance"),
+    BenchmarkTag("graph", "Tests graph-based execution performance"),
+    BenchmarkTag("latency", "Measures operation latency"),
+    BenchmarkTag("throughput", "Measures operation throughput"),
+    BenchmarkTag("inference", "Tests ML/AI inference performance"),
+    BenchmarkTag("image", "Image processing benchmark"),
+    BenchmarkTag("simulation", "Physics or scientific simulation benchmark"),
 ]
 
 benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags}
 
+
 class Benchmark:
     def __init__(self, directory, suite):
         self.directory = directory
@@ -134,9 +135,10 @@ def get_metadata(self) -> BenchmarkMetadata:
             description=self.description(),
             notes=self.notes(),
             unstable=self.unstable(),
-            tags=self.get_tags()
+            tags=self.get_tags(),
         )
 
+
 class Suite:
     def benchmarks(self) -> list[Benchmark]:
         raise NotImplementedError()
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index e1f24e0178789..cd4ab7cd9b26c 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -26,6 +26,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str:
         RUNTIMES.UR: "Unified Runtime",
     }[runtime]
 
+
 def runtime_to_tag_name(runtime: RUNTIMES) -> str:
     return {
         RUNTIMES.SYCL: "SYCL",
@@ -84,16 +85,15 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]:
                 "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n"
                 "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n"
                 "Work is ongoing to reduce the overhead of the SYCL API\n",
-                tags=['submit', 'micro', 'SYCL', 'UR', 'L0']
+                tags=["submit", "micro", "SYCL", "UR", "L0"],
             ),
             "SinKernelGraph": BenchmarkMetadata(
                 type="group",
                 unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.",
-                    tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph']
+                tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"],
             ),
             "SubmitGraph": BenchmarkMetadata(
-                type="group",
-                tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph']
+                type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"]
             ),
         }
 
@@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0):
         )
 
     def get_tags(self):
-        return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro']
+        return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"]
 
     def name(self):
         order = "in order" if self.ioq else "out of order"
@@ -344,7 +344,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'submit', 'latency', 'SYCL', 'micro']
+        return ["memory", "submit", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -377,7 +377,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'SYCL', 'micro']
+        return ["memory", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -407,7 +407,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'SYCL', 'micro']
+        return ["memory", "latency", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -439,7 +439,7 @@ def lower_is_better(self):
         return False
 
     def get_tags(self):
-        return ['memory', 'throughput', 'SYCL', 'micro']
+        return ["memory", "throughput", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -468,7 +468,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['math', 'throughput', 'SYCL', 'micro']
+        return ["math", "throughput", "SYCL", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -517,7 +517,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['memory', 'latency', 'UR', 'micro']
+        return ["memory", "latency", "UR", "micro"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -560,7 +560,14 @@ def unstable(self) -> str:
         return "This benchmark combines both eager and graph execution, and may not be representative of real use cases."
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency']
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "proxy",
+            "submit",
+            "memory",
+            "latency",
+        ]
 
     def bin_args(self) -> list[str]:
         return [
@@ -595,7 +602,13 @@ def name(self):
         return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}"
 
     def get_tags(self):
-        return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency']
+        return [
+            "graph",
+            runtime_to_tag_name(self.runtime),
+            "micro",
+            "submit",
+            "latency",
+        ]
 
     def bin_args(self) -> list[str]:
         return [
@@ -625,7 +638,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
 
     def bin_args(self) -> list[str]:
         return [
@@ -666,7 +679,7 @@ def name(self):
         return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}"
 
     def get_tags(self):
-        return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit']
+        return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"]
 
     def bin_args(self) -> list[str]:
         return [
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index cf203bca17f4f..19af2498a0a63 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -102,7 +102,7 @@ def description(self) -> str:
         )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'throughput']
+        return ["SYCL", "application", "inference", "throughput"]
 
     def lower_is_better(self):
         return False
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 50f35182eaddc..f1e366aa5bc4b 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -113,15 +113,15 @@ def extra_env_vars(self) -> dict:
         return {}
 
     def get_tags(self):
-        base_tags = ['SYCL', 'micro']
+        base_tags = ["SYCL", "micro"]
         if "Memory" in self.bench_name or "mem" in self.bench_name.lower():
-            base_tags.append('memory')
+            base_tags.append("memory")
         if "Reduction" in self.bench_name:
-            base_tags.append('math')
+            base_tags.append("math")
         if "Bandwidth" in self.bench_name:
-            base_tags.append('throughput')
+            base_tags.append("throughput")
         if "Latency" in self.bench_name:
-            base_tags.append('latency')
+            base_tags.append("latency")
         return base_tags
 
     def setup(self):
diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py
index 60964fcf93298..f0b92777dd2f8 100644
--- a/devops/scripts/benchmarks/benches/umf.py
+++ b/devops/scripts/benchmarks/benches/umf.py
@@ -75,7 +75,7 @@ def setup(self):
         self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name)
 
     def get_tags(self):
-        return ['UMF', 'allocation', 'latency', 'micro']
+        return ["UMF", "allocation", "latency", "micro"]
 
     def run(self, env_vars) -> list[Result]:
         command = [
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 623079067b91d..0e1f20999c731 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -119,7 +119,7 @@ def description(self) -> str:
         return ""
 
     def get_tags(self):
-        return ['SYCL', 'application']
+        return ["SYCL", "application"]
 
     def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
@@ -176,7 +176,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'throughput']
+        return ["SYCL", "application", "throughput"]
 
 
 class Bitcracker(VelocityBase):
@@ -217,7 +217,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'throughput']
+        return ["SYCL", "application", "throughput"]
 
 
 class SobelFilter(VelocityBase):
@@ -266,7 +266,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'image', 'throughput']
+        return ["SYCL", "application", "image", "throughput"]
 
 
 class QuickSilver(VelocityBase):
@@ -316,7 +316,7 @@ def parse_output(self, stdout: str) -> float:
             )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'simulation', 'throughput']
+        return ["SYCL", "application", "simulation", "throughput"]
 
 
 class Easywave(VelocityBase):
@@ -383,7 +383,7 @@ def parse_output(self, stdout: str) -> float:
         )
 
     def get_tags(self):
-        return ['SYCL', 'application', 'simulation']
+        return ["SYCL", "application", "simulation"]
 
 
 class CudaSift(VelocityBase):
@@ -414,7 +414,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'image']
+        return ["SYCL", "application", "image"]
 
 
 class DLCifar(VelocityBase):
@@ -468,7 +468,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'image']
+        return ["SYCL", "application", "inference", "image"]
 
 
 class DLMnist(VelocityBase):
@@ -556,7 +556,7 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference', 'image']
+        return ["SYCL", "application", "inference", "image"]
 
 
 class SVM(VelocityBase):
@@ -602,4 +602,4 @@ def parse_output(self, stdout: str) -> float:
             raise ValueError("Failed to parse benchmark output.")
 
     def get_tags(self):
-        return ['SYCL', 'application', 'inference']
+        return ["SYCL", "application", "inference"]
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 41fe6996ed432..ba8e77c6aff9e 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -36,13 +36,6 @@ <h1>Benchmark Results</h1>
         <details class="options-container">
             <summary>Options</summary>
             <div class="options-content">
-                <div class="filter-section">
-                    <h3>Suites</h3>
-                    <div id="suite-filters">
-                        <!-- Suite checkboxes will be generated by JavaScript -->
-                    </div>
-                </div>
-
                 <div class="filter-section">
                     <h3>Display Options</h3>
                     <div class="display-options">
@@ -56,12 +49,16 @@ <h3>Display Options</h3>
                         </label>
                     </div>
                 </div>
-                
+
                 <div class="filter-section">
-                    <h3>Tags</h3>
-                    <div class="tag-filter-actions">
-                        <button onclick="toggleAllTags(false)">Clear All</button>
+                    <h3>Suites</h3>
+                    <div id="suite-filters">
+                        <!-- Suite checkboxes will be generated by JavaScript -->
                     </div>
+                </div>
+
+                <div class="filter-section">
+                    <h3>Tags <button class="tag-action-button" onclick="toggleAllTags(false)">Clear All</button></h3>
                     <div id="tag-filters">
                         <!-- Tag checkboxes will be generated by JavaScript -->
                     </div>
diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css
index 9a3c5fe69b287..3e9c3bd22fc37 100644
--- a/devops/scripts/benchmarks/html/styles.css
+++ b/devops/scripts/benchmarks/html/styles.css
@@ -242,11 +242,18 @@ details[open] summary::after {
     font-size: 18px;
     font-weight: 500;
     text-align: left;
+    display: flex;
+    align-items: center;
 }
 #suite-filters {
     display: flex;
     flex-wrap: wrap;
-    gap: 8px;
+    max-height: 200px;
+    overflow-y: auto;
+    border: 1px solid #dee2e6;
+    border-radius: 4px;
+    padding: 8px;
+    background-color: #f8f9fa;
 }
 .display-options {
     display: flex;
@@ -286,7 +293,7 @@ details[open] summary::after {
     padding: 2px 8px;
     border-radius: 12px;
     font-size: 12px;
-    cursor: default;
+    cursor: help;
 }
 
 .tag-filter {
@@ -320,44 +327,21 @@ details[open] summary::after {
     background-color: #f8f9fa;
 }
 
-.tag-filter-actions {
-    margin-bottom: 8px;
-    display: flex;
-    gap: 8px;
-}
-
-.tag-filter-actions button {
-    padding: 4px 8px;
+.tag-action-button {
+    padding: 2px 8px;
     background: #e2e6ea;
     border: none;
     border-radius: 4px;
     cursor: pointer;
+    font-size: 12px;
+    margin-left: 8px;
+    vertical-align: middle;
 }
 
-.tag-filter-actions button:hover {
+.tag-action-button:hover {
     background: #ced4da;
 }
 
-#active-tags {
-    display: none;
-    flex-wrap: wrap;
-    gap: 8px;
-    margin-top: 12px;
-    padding: 8px;
-    background-color: #f8f9fa;
-    border-radius: 4px;
-}
-
-.active-tag {
-    display: flex;
-    align-items: center;
-    background-color: #0068B5;
-    color: white;
-    padding: 4px 8px;
-    border-radius: 12px;
-    font-size: 14px;
-}
-
 .remove-tag {
     background: none;
     border: none;
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 267c7f8142c2f..c852e50c71372 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -47,4 +47,5 @@ class Options:
     custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
+
 options = Options()
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 429b24eb632c8..319e796a3831d 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -17,13 +17,13 @@ def generate_html(
     metadata: dict[str, BenchmarkMetadata],
 ):
     benchmark_runs.sort(key=lambda run: run.date, reverse=True)
-    
+
     # Create the comprehensive output object
     output = BenchmarkOutput(
         runs=benchmark_runs,
         metadata=metadata,
         tags=benchmark_tags_dict,
-        default_compare_names=compare_names
+        default_compare_names=compare_names,
     )
 
     if options.output_html == "local":
@@ -37,7 +37,7 @@ def generate_html(
             f.write("benchmarkMetadata = ")
             json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
             f.write(";\n\n")
-            
+
             f.write("benchmarkTags = ")
             json.dump(json.loads(output.to_json())["tags"], f, indent=2)
             f.write(";\n\n")
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py
index 85271726e715c..e617168f37a76 100644
--- a/devops/scripts/benchmarks/utils/compute_runtime.py
+++ b/devops/scripts/benchmarks/utils/compute_runtime.py
@@ -143,7 +143,10 @@ def build_igc(self, repo, commit):
         run(configure_command)
 
         # set timeout to 2h. IGC takes A LONG time to build if building from scratch.
-        run(f"cmake --build {self.igc_build} -j {options.build_jobs}", timeout=60 * 60 * 2)
+        run(
+            f"cmake --build {self.igc_build} -j {options.build_jobs}",
+            timeout=60 * 60 * 2,
+        )
         # cmake --install doesn't work...
         run("make install", cwd=self.igc_build)
         return self.igc_install

From a0d8370e5011ecb62dc31b7c82542d3e979429d8 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 15:00:10 +0100
Subject: [PATCH 069/114] fix fetching tags from remote json

---
 devops/scripts/benchmarks/html/scripts.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index fbfb496533194..e09b420e95f21 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -789,6 +789,7 @@ function loadData() {
             .then(data => {
                 benchmarkRuns = data.runs || data;
                 benchmarkMetadata = data.metadata || benchmarkMetadata || {};
+                benchmarkTags = data.tags || benchmarkTags || {};
                 initializeCharts();
             })
             .catch(error => {

From c7f8d1084c95af7a8fa2406a666d7b29a9ad6553 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 16:06:00 +0100
Subject: [PATCH 070/114] fix results /w descriptions and add url/commit of
 benchmarks

---
 devops/scripts/benchmarks/benches/compute.py   | 13 ++++++++++---
 devops/scripts/benchmarks/benches/llamacpp.py  | 13 ++++++++++---
 devops/scripts/benchmarks/benches/syclbench.py | 12 ++++++++++--
 devops/scripts/benchmarks/benches/test.py      |  1 -
 devops/scripts/benchmarks/benches/velocity.py  | 13 ++++++++++---
 devops/scripts/benchmarks/utils/result.py      |  3 ++-
 6 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index cd4ab7cd9b26c..0646aa500450a 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -42,6 +42,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Compute Benchmarks"
 
+    def git_url(self) -> str:
+        return "https://github.com/intel/compute-benchmarks.git"
+
+    def git_hash(self) -> str:
+        return "b5cc46acf61766ab00da04e85bd4da4f7591eb21"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -49,8 +55,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "compute-benchmarks-repo",
-            "https://github.com/intel/compute-benchmarks.git",
-            "b5cc46acf61766ab00da04e85bd4da4f7591eb21",
+            self.git_url(),
+            self.git_hash(),
         )
         build_path = create_build_path(self.directory, "compute-benchmarks-build")
 
@@ -237,7 +243,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    description=self.description(),
+                    git_url=self.git_url(),
+                    git_hash=self.git_hash(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 19af2498a0a63..33ffd1f11eabd 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -25,6 +25,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "llama.cpp bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/ggerganov/llama.cpp"
+
+    def git_hash(self) -> str:
+        return "1ee9eea094fe5846c7d8d770aa7caa749d246b23"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -32,8 +38,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "llamacpp-repo",
-            "https://github.com/ggerganov/llama.cpp",
-            "1ee9eea094fe5846c7d8d770aa7caa749d246b23",
+            self.git_url(),
+            self.git_hash(),
         )
 
         self.models_dir = os.path.join(self.directory, "models")
@@ -142,7 +148,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    description=self.description(),
+                    git_url=self.git_url(),
+                    git_hash=self.git_hash(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index f1e366aa5bc4b..0d924f7427ef0 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -23,6 +23,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "SYCL-Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/unisa-hpc/sycl-bench.git"
+
+    def git_hash(self) -> str:
+        return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -31,8 +37,8 @@ def setup(self):
         repo_path = git_clone(
             self.directory,
             "sycl-bench-repo",
-            "https://github.com/unisa-hpc/sycl-bench.git",
-            "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b",
+            self.git_url(),
+            self.git_hash(),
         )
 
         configure_command = [
@@ -159,6 +165,8 @@ def run(self, env_vars) -> list[Result]:
                             env=env_vars,
                             stdout=row,
                             unit="ms",
+                            git_url=self.git_url(),
+                            git_hash=self.git_hash(),
                         )
                     )
 
diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py
index 7afdd803b5cc3..ad1e8c9e57735 100644
--- a/devops/scripts/benchmarks/benches/test.py
+++ b/devops/scripts/benchmarks/benches/test.py
@@ -99,7 +99,6 @@ def run(self, env_vars) -> list[Result]:
                 env={"A": "B"},
                 stdout="no output",
                 unit="ms",
-                description=self.description(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 0e1f20999c731..4db6a87a97325 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -26,6 +26,12 @@ def __init__(self, directory):
     def name(self) -> str:
         return "Velocity Bench"
 
+    def git_url(self) -> str:
+        return "https://github.com/oneapi-src/Velocity-Bench/"
+
+    def git_hash(self) -> str:
+        return "b22215c16f789100449c34bf4eaa3fb178983d69"
+
     def setup(self):
         if options.sycl is None:
             return
@@ -33,8 +39,8 @@ def setup(self):
         self.repo_path = git_clone(
             self.directory,
             "velocity-bench-repo",
-            "https://github.com/oneapi-src/Velocity-Bench/",
-            "b22215c16f789100449c34bf4eaa3fb178983d69",
+            self.git_url(),
+            self.git_hash(),
         )
 
     def benchmarks(self) -> list[Benchmark]:
@@ -139,7 +145,8 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                description=self.description(),
+                git_url=self.git_url(),
+                git_hash=self.git_hash(),
             )
         ]
 
diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py
index 82fc7ca1fddc2..b9ebfdcb60952 100644
--- a/devops/scripts/benchmarks/utils/result.py
+++ b/devops/scripts/benchmarks/utils/result.py
@@ -27,7 +27,8 @@ class Result:
     name: str = ""
     lower_is_better: bool = True
     suite: str = "Unknown"
-
+    git_url: str = ""
+    git_hash: str = ""
 
 @dataclass_json
 @dataclass

From 1dad51339a1f6684aa82c2023c7718bbf74c0be0 Mon Sep 17 00:00:00 2001
From: pbalcer <piotr.balcer@intel.com>
Date: Thu, 20 Mar 2025 16:37:23 +0100
Subject: [PATCH 071/114] fix git repo/hash for benchmarks

---
 devops/scripts/benchmarks/benches/compute.py   | 4 ++--
 devops/scripts/benchmarks/benches/llamacpp.py  | 4 ++--
 devops/scripts/benchmarks/benches/syclbench.py | 4 ++--
 devops/scripts/benchmarks/benches/velocity.py  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
index 0646aa500450a..d83a0d081af57 100644
--- a/devops/scripts/benchmarks/benches/compute.py
+++ b/devops/scripts/benchmarks/benches/compute.py
@@ -243,8 +243,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit=parse_unit_type(unit),
-                    git_url=self.git_url(),
-                    git_hash=self.git_hash(),
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return ret
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
index 33ffd1f11eabd..86d41ed525292 100644
--- a/devops/scripts/benchmarks/benches/llamacpp.py
+++ b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -148,8 +148,8 @@ def run(self, env_vars) -> list[Result]:
                     env=env_vars,
                     stdout=result,
                     unit="token/s",
-                    git_url=self.git_url(),
-                    git_hash=self.git_hash(),
+                    git_url=self.bench.git_url(),
+                    git_hash=self.bench.git_hash(),
                 )
             )
         return results
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
index 0d924f7427ef0..9854c92d338fc 100644
--- a/devops/scripts/benchmarks/benches/syclbench.py
+++ b/devops/scripts/benchmarks/benches/syclbench.py
@@ -165,8 +165,8 @@ def run(self, env_vars) -> list[Result]:
                             env=env_vars,
                             stdout=row,
                             unit="ms",
-                            git_url=self.git_url(),
-                            git_hash=self.git_hash(),
+                            git_url=self.bench.git_url(),
+                            git_hash=self.bench.git_hash(),
                         )
                     )
 
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
index 4db6a87a97325..493298dea8b10 100644
--- a/devops/scripts/benchmarks/benches/velocity.py
+++ b/devops/scripts/benchmarks/benches/velocity.py
@@ -145,8 +145,8 @@ def run(self, env_vars) -> list[Result]:
                 env=env_vars,
                 stdout=result,
                 unit=self.unit,
-                git_url=self.git_url(),
-                git_hash=self.git_hash(),
+                git_url=self.vb.git_url(),
+                git_hash=self.vb.git_hash(),
             )
         ]
 

From 9f1df9a63675eae64875355fb2fb774d653d40da Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 20 Mar 2025 12:33:30 -0700
Subject: [PATCH 072/114] [test] bump threshold to 0.01 to trigger failrues

---
 devops/scripts/benchmarks/options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 14717eb6db595..c1ba090523eb6 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -45,7 +45,7 @@ class Options:
     preset: str = "Full"
     custom_results_dir = None
 
-    regression_threshold: float = 0.05
+    regression_threshold: float = 0.01
     timestamp_override: str = None
 
 

From be7271cff23e13c3583a9ac365f4dde5f3eb3842 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 24 Mar 2025 12:50:02 -0700
Subject: [PATCH 073/114] Rename ambiguous 'benchmarks.yml' to a better name

---
 .../workflows/{benchmark.yml => sycl-ur-perf-benchmarking.yml}    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{benchmark.yml => sycl-ur-perf-benchmarking.yml} (100%)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
similarity index 100%
rename from .github/workflows/benchmark.yml
rename to .github/workflows/sycl-ur-perf-benchmarking.yml

From c55313bcc3530cd3f41a84505885e102f3ce2fed Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 26 Mar 2025 12:32:19 -0700
Subject: [PATCH 074/114] Remove sycl-benchmark-aggregate instrumentation

---
 .../workflows/sycl-benchmark-aggregate.yml    | 83 ++++++++++---------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml
index b6f391f3e1e6d..87f7ef718160a 100644
--- a/.github/workflows/sycl-benchmark-aggregate.yml
+++ b/.github/workflows/sycl-benchmark-aggregate.yml
@@ -1,47 +1,52 @@
-name: Test benchmark.yml using sycl-benchmark-aggregate
+name: Aggregate compute-benchmark averages from historical data
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
 
 on:
   workflow_dispatch:
     inputs:
-      commit_hash:
-        description: Specific commit hash to build SYCL from
-        type: string
-        required: false
-      upload_results:
-        description: 'Save and upload results'
-        type: choice
-        options:
-          - false
-          - true
-        default: true
-      runner:
-        type: choice
-        options:
-          - '["PVC_PERF"]'
-      backend:
-        description: Backend to use
-        type: choice
-        options:
-          - 'level_zero:gpu'
-        # TODO L0 V2 support
-      reset_intel_gpu:
-        description: Reset Intel GPUs
-        type: choice
-        options:
-          - false
-          - true
-        default: true
+      lookback_days:
+        description: |
+          Number of days from today to look back in historical results for:
+          This sets the age limit of data used in average calculation: Any
+          benchmark results created before `lookback_days` from today is
+          excluded from being aggregated in the historical average. 
+        type: number
+        required: true
+  workflow_call:
+    inputs:
+      lookback_days:
+        type: number
+        required: true
+    secrets:
+      LLVM_SYCL_BENCHMARK_TOKEN:
+        description: |
+          Github token used by the faceless account to push newly calculated
+          medians.
+        required: true
+
 
-permissions: read-all
+permissions:
+  contents: read
 
 jobs:
   aggregate:
-    name: Test benchmark.yml
-    uses: ./.github/workflows/benchmark.yml
-    secrets: inherit
-    with:
-      commit_hash: ${{ inputs.commit_hash }}
-      upload_results: ${{ inputs.upload_results }}
-      runner: ${{ inputs.runner }}
-      backend: ${{ inputs.backend }}
-      reset_intel_gpu: ${{ inputs.reset_intel_gpu }}
\ No newline at end of file
+    name: Aggregate average (median) value for all metrics
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        sparse-checkout: |
+          devops/scripts/benchmarking
+          devops/benchmarking
+          devops/actions/benchmarking
+    - name: Aggregate benchmark results and produce historical average
+      uses: ./devops/actions/benchmarking/aggregate
+      with:
+        lookback_days: ${{ inputs.lookback_days }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}

From d0d1d3d06728f809c4d39a8adf18bbd65795f8f9 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Mar 2025 12:21:06 -0700
Subject: [PATCH 075/114] Enable build from PR and L0v2

---
 .github/workflows/sycl-linux-run-tests.yml    |  4 +-
 .../workflows/sycl-ur-perf-benchmarking.yml   | 58 ++++++++++++++-----
 .../actions/run-tests/benchmark_v2/action.yml | 19 +++---
 3 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 3a93c2aae254c..27645fb559ca6 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -118,7 +118,7 @@ on:
         type: string
         default: 'false'
         required: False
-      benchmark_build_hash:
+      benchmark_save_name:
         type: string
         default: ''
         required: False
@@ -355,7 +355,7 @@ jobs:
       with:
         target_devices: ${{ inputs.target_devices }}
         upload_results: ${{ inputs.benchmark_upload_results }}
-        build_hash: ${{ inputs.benchmark_build_hash }}
+        save_name: ${{ inputs.benchmark_save_name }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 8e860bce6a384..cf5d9f3fde006 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -5,8 +5,23 @@ on:
     - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
   workflow_call:
     inputs:
+      pr_no:
+        type: string
+        description: |
+          PR no. to build SYCL from if specified: SYCL will be built from HEAD
+          of incoming branch used by the specified PR no.
+
+          If both pr_no and commit_hash are empty, the latest SYCL nightly build
+          will be used.
+        required: false
+        default: ''
       commit_hash:
         type: string
+        description: |
+          Commit hash (within intel/llvm) to build SYCL from if specified.
+
+          If both pr_no and commit_hash are empty, the latest SYCL nightly build
+          will be used.
         required: false
         default: ''
       upload_results:
@@ -25,9 +40,20 @@ on:
 
   workflow_dispatch:
     inputs:
+      pr_no:
+        type: string
+        description: |
+          PR no. to build SYCL from:
+            
+          SYCL will be built from HEAD of incoming branch.
+        required: false
+        default: ''
       commit_hash:
-        description: Commit hash to build intel/llvm from
         type: string
+        description: |
+          Commit hash (within intel/llvm) to build SYCL from:
+
+          Leave both pr_no and commit_hash empty to use last SYCL nightly build.
         required: false
         default: ''
       upload_results:
@@ -46,7 +72,8 @@ on:
         type: choice
         options:
           - 'level_zero:gpu'
-        # TODO L0 V2 support
+          - 'level_zero_v2:gpu'
+            # As of #17407, sycl-linux-build now builds v2 by default
       reset_intel_gpu:
         description: Reset Intel GPUs
         type: choice
@@ -59,37 +86,37 @@ permissions: read-all
 
 jobs:
   build_sycl:
-    name: Build SYCL from PR
-    if: inputs.commit_hash != ''
+    if: inputs.commit_hash != '' || inputs.pr_no != ''
+    name: Build SYCL
     uses: ./.github/workflows/sycl-linux-build.yml
     with:
-      build_ref: ${{ inputs.commit_hash }}
+      build_ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }}
       build_cache_root: "/__w/"
       build_artifact_suffix: "default"
       build_cache_suffix: "default"
-      # Docker image has last nightly pre-installed and added to the PATH
       build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
       cc: clang
       cxx: clang++
       changes: '[]'
 
   run_benchmarks_build:
+    if: inputs.commit_hash != '' || inputs.pr_no != ''
     name: Run Benchmarks (on PR Build)
     needs: [ build_sycl ]
-    if: inputs.commit_hash != ''
     strategy:
       matrix:
-        # Set default values if not specified:
         include:
-          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
+          - ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }}
+            save_name: ${{ inputs.commit_hash != '' && format('Commit{0}', inputs.commit_hash) || format('PR{0}', inputs.pr_no) }}
+            # Set default values if not specified:
+            runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
             reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
-            ref: ${{ inputs.commit_hash }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
       # TODO support other benchmarks
-      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }})
       runner: ${{ matrix.runner }}
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
@@ -97,7 +124,7 @@ jobs:
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       benchmark_upload_results: ${{ inputs.upload_results }}
-      benchmark_build_hash: ${{ inputs.commit_hash }}
+      benchmark_save_name: ${{ matrix.save_name }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
@@ -106,7 +133,7 @@ jobs:
 
   run_benchmarks_nightly:
     name: Run Benchmarks (on Nightly Build)
-    if: inputs.commit_hash == ''
+    if: inputs.commit_hash == '' && inputs.pr_no == ''
     strategy:
       matrix:
         # Set default values if not specified:
@@ -118,12 +145,13 @@ jobs:
     secrets: inherit
     with:
       # TODO support other benchmarks
-      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
+      name: Run compute-benchmarks (Nightly, ${{ matrix.runner }}, ${{ matrix.backend }})
       runner: ${{ matrix.runner }}
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
       reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
+      benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}
-      repo_ref: ${{ github.ref }}
+      repo_ref: ${{ github.ref }} # TODO figure out nightly commit hash
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index fa03ec9a060c5..d540ef58d29e1 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -19,9 +19,9 @@ inputs:
   upload_results:
     type: string
     required: True
-  build_hash:
+  save_name:
     type: string
-    required: False
+    required: True
     default: ''
 
 runs:
@@ -86,7 +86,9 @@ runs:
       git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
     env:
-      BUILD_HASH: ${{ inputs.build_hash }}
+      # Need to append "_<device>_<backend>" to save name in order to follow
+      # conventions:
+      SAVE_PREFIX: ${{ inputs.save_name }}
     shell: bash
     run: |
       # TODO generate summary + display helpful message here
@@ -98,12 +100,15 @@ runs:
       echo "-----"
       mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
 
+      case "$ONEAPI_DEVICE_SELECTOR" in
+        level_zero:*) SAVE_SUFFIX="L0" ;;
+        level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
+        opencl:*) SAVE_SUFFIX="OCL" ;;
+        *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";;
+      esac
       # TODO accomodate for different GPUs and backends
-      SAVE_NAME="Baseline_PVC_L0"
+      SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}"
       SAVE_TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"
-      if [ -n "$BUILD_HASH" ]; then
-          SAVE_NAME="Commit_PVC_$BUILD_HASH"
-      fi
 
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
         "$(realpath ./llvm_test_workdir)" \

From 4c515586d22fde76eeb8b341e3ad2e15eb3a6e83 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Mar 2025 14:07:15 -0700
Subject: [PATCH 076/114] Introduce presets

---
 .github/workflows/sycl-linux-run-tests.yml    |  5 +++
 .../workflows/sycl-ur-perf-benchmarking.yml   | 19 ++++++++++
 .../actions/run-tests/benchmark_v2/action.yml | 11 +++++-
 devops/scripts/benchmarks/presets.py          | 37 +++++++++++++++++++
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 27645fb559ca6..944eb97ffe381 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -122,6 +122,10 @@ on:
         type: string
         default: ''
         required: False
+      benchmark_preset:
+        type: string
+        default: 'Minimal'
+        required: False
 
   workflow_dispatch:
     inputs:
@@ -356,6 +360,7 @@ jobs:
         target_devices: ${{ inputs.target_devices }}
         upload_results: ${{ inputs.benchmark_upload_results }}
         save_name: ${{ inputs.benchmark_save_name }}
+        preset: ${{ inputs.benchmark_preset }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
         GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index cf5d9f3fde006..e2cdcb58396c2 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -5,6 +5,12 @@ on:
     - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
   workflow_call:
     inputs:
+      preset:
+        type: string
+        description: |
+          Benchmark presets to run: See /devops/scripts/benchmarks/presets.py
+        required: false
+        default: 'Minimal'  # Only compute-benchmarks
       pr_no:
         type: string
         description: |
@@ -40,6 +46,17 @@ on:
 
   workflow_dispatch:
     inputs:
+      preset:
+        type: choice
+        description: |
+          Benchmark presets to run, See /devops/scripts/benchmarks/presets.py. Hint: Minimal is compute-benchmarks only.
+        options:
+          - Full
+          - SYCL
+          - Minimal
+          - Normal
+          - Test
+        default: 'Minimal'  # Only compute-benchmarks
       pr_no:
         type: string
         description: |
@@ -125,6 +142,7 @@ jobs:
       tests_selector: benchmark_v2
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_save_name: ${{ matrix.save_name }}
+      benchmark_preset: ${{ inputs.preset }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
       sycl_toolchain_artifact: sycl_linux_default
@@ -154,4 +172,5 @@ jobs:
       tests_selector: benchmark_v2
       benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}
+      benchmark_preset: ${{ inputs.preset }}
       repo_ref: ${{ github.ref }} # TODO figure out nightly commit hash
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index d540ef58d29e1..0339c4337a759 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -22,7 +22,9 @@ inputs:
   save_name:
     type: string
     required: True
-    default: ''
+  preset:
+    type: string
+    required: True
 
 runs:
   using: "composite"
@@ -32,6 +34,7 @@ runs:
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
       RUNNER_NAME: ${{ runner.name }}
+      PRESET: ${{ inputs.preset }}
     run: |
       case "$RUNNER_TAG" in
         '["PVC_PERF"]' ) ;;
@@ -61,6 +64,10 @@ runs:
       esac
       echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
 
+      # Make sure specified preset is a known value and is not malicious
+      python3 ./devops/scripts/benchmarks/preset.py "$PRESET"
+      [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
+      echo "PRESET=$PRESET" >> $GITHUB_ENV
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
@@ -117,7 +124,7 @@ runs:
         --output-html remote \
         --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
         --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --preset Minimal \
+        --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP"
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 3f191766deb8c..42a49b732ff3c 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -3,6 +3,8 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import argparse
+
 presets: dict[str, list[str]] = {
     "Full": [
         "Compute Benchmarks",
@@ -36,3 +38,38 @@ def enabled_suites(preset: str) -> list[str]:
         return presets[preset]
     except KeyError:
         raise ValueError(f"Preset '{preset}' not found.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Benchmark Preset Utilities")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    query_parser = subparsers.add_parser(
+        "query",
+        help="Query benchmarks ran by a preset (as defined in presets.py)"
+    )
+    validate_parser.add_argument(
+        "preset_to_query",
+        type=str,
+        help="preset name to query"
+    )
+    validate_parser.add_argument(
+        "-q", "--quiet",
+        action="store_true",
+        help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script."
+    )
+
+    args = parser.parse_args()
+    if args.command == 'query':
+        if args.preset_to_query in presets:
+            if not args.quiet:
+                print(f"Benchmark suites to be ran in {args.preset_to_query}:")
+                for suite in presets[args.preset_to_query]:
+                    print(suite)
+            exit(0)
+        else:
+            if not args.quiet: print(f"Error: No preset named '{args.preset_to_query}'.")
+            exit(1)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 63d22353e0c883489713b2fc5db6e0015077b066 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Mar 2025 14:13:08 -0700
Subject: [PATCH 077/114] Fix typo

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 0339c4337a759..4b9c0c2f61fd2 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -65,7 +65,7 @@ runs:
       echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
 
       # Make sure specified preset is a known value and is not malicious
-      python3 ./devops/scripts/benchmarks/preset.py "$PRESET"
+      python3 ./devops/scripts/benchmarks/presets.py "$PRESET"
       [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
       echo "PRESET=$PRESET" >> $GITHUB_ENV
   - name: Compute CPU core range to run benchmarks on

From 23330fc2bc1463ae456a70648101e5647b2ae7bf Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Mar 2025 14:14:15 -0700
Subject: [PATCH 078/114] Fix typo part 2.

---
 devops/scripts/benchmarks/presets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 42a49b732ff3c..167283baeea7c 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -48,12 +48,12 @@ def main():
         "query",
         help="Query benchmarks ran by a preset (as defined in presets.py)"
     )
-    validate_parser.add_argument(
+    query_parser.add_argument(
         "preset_to_query",
         type=str,
         help="preset name to query"
     )
-    validate_parser.add_argument(
+    query_parser.add_argument(
         "-q", "--quiet",
         action="store_true",
         help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script."

From 0d79d8993c69e2017e7231da2ef44447bea197aa Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 28 Mar 2025 14:15:13 -0700
Subject: [PATCH 079/114] Fix typo pt 3.

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 4b9c0c2f61fd2..59d3c0fa96f6f 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -65,7 +65,7 @@ runs:
       echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
 
       # Make sure specified preset is a known value and is not malicious
-      python3 ./devops/scripts/benchmarks/presets.py "$PRESET"
+      python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
       [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
       echo "PRESET=$PRESET" >> $GITHUB_ENV
   - name: Compute CPU core range to run benchmarks on

From a8048b2597930d96279a1283a791e3d613657827 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Sun, 30 Mar 2025 20:06:34 -0700
Subject: [PATCH 080/114] Reset ur-build-hw.sh

---
 .github/workflows/ur-build-hw.yml                              | 2 +-
 {devops => unified-runtime/.github}/scripts/get_system_info.sh | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {devops => unified-runtime/.github}/scripts/get_system_info.sh (100%)

diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml
index eebac4e424a4b..a0f94ab10f538 100644
--- a/.github/workflows/ur-build-hw.yml
+++ b/.github/workflows/ur-build-hw.yml
@@ -156,4 +156,4 @@ jobs:
 
     - name: Get information about platform
       if: ${{ always() }}
-      run: ${{github.workspace}}/devops/scripts/get_system_info.sh
+      run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh
diff --git a/devops/scripts/get_system_info.sh b/unified-runtime/.github/scripts/get_system_info.sh
similarity index 100%
rename from devops/scripts/get_system_info.sh
rename to unified-runtime/.github/scripts/get_system_info.sh

From 29d125c5d2f5e053536ee7ef4df568f9bb7ee170 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Sun, 30 Mar 2025 20:09:54 -0700
Subject: [PATCH 081/114] Add comments explaining executable section in
 presets.py

---
 devops/scripts/benchmarks/presets.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 167283baeea7c..61ce9f4aebc49 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -40,6 +40,8 @@ def enabled_suites(preset: str) -> list[str]:
         raise ValueError(f"Preset '{preset}' not found.")
 
 
+# Utility scripts to validate a given preset, useful for e.g. CI:
+
 def main():
     parser = argparse.ArgumentParser(description="Benchmark Preset Utilities")
     subparsers = parser.add_subparsers(dest="command", required=True)
@@ -72,4 +74,4 @@ def main():
             exit(1)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From 5a3afcbd19b14a615e514156daa034ac4e414eef Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Sun, 30 Mar 2025 20:16:42 -0700
Subject: [PATCH 082/114] Revert stuff that shouldnt be merged

---
 .github/workflows/sycl-linux-run-tests.yml  | 1 -
 devops/scripts/benchmarks/requirements.txt  | 1 -
 devops/scripts/benchmarks/utils/validate.py | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 59da98228bae9..257b1d4723916 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -136,7 +136,6 @@ on:
           - '["cts-cpu"]'
           - '["Linux", "build"]'
           - '["cuda"]'
-          - '["Linux", "bmg"]'
           - '["PVC_PERF"]'
       image:
         type: choice
diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt
index 9f0381ceef6c2..99ba0caab55c2 100644
--- a/devops/scripts/benchmarks/requirements.txt
+++ b/devops/scripts/benchmarks/requirements.txt
@@ -2,4 +2,3 @@ matplotlib==3.9.2
 mpld3==0.5.10
 dataclasses-json==0.6.7
 PyYAML==6.0.1
-Mako==1.3.9
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
index 2d01255487a44..3d22c242f8301 100644
--- a/devops/scripts/benchmarks/utils/validate.py
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -19,4 +19,4 @@ def timestamp(t: str) -> bool:
         timestamp_re = re.compile(
             r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
         )
-        return timestamp_re.match(t) is not None
\ No newline at end of file
+        return timestamp_re.match(t) is not None

From b6d42d41298d92c9f7f6003adfd243cfa574d18f Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Sun, 30 Mar 2025 20:22:16 -0700
Subject: [PATCH 083/114] Finally no more reset_intel_gpu

---
 .github/workflows/sycl-ur-perf-benchmarking.yml | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index e2cdcb58396c2..2713d60f0a2b9 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -39,10 +39,6 @@ on:
       backend:
         type: string
         required: true
-      reset_intel_gpu:
-        type: string  # true/false: workflow_dispatch does not support booleans
-        required: true
-        default: true
 
   workflow_dispatch:
     inputs:
@@ -91,13 +87,6 @@ on:
           - 'level_zero:gpu'
           - 'level_zero_v2:gpu'
             # As of #17407, sycl-linux-build now builds v2 by default
-      reset_intel_gpu:
-        description: Reset Intel GPUs
-        type: choice
-        options:
-          - false
-          - true
-        default: true
 
 permissions: read-all
 
@@ -128,17 +117,14 @@ jobs:
             # Set default values if not specified:
             runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
-      # TODO support other benchmarks
       name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }})
       runner: ${{ matrix.runner }}
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_save_name: ${{ matrix.save_name }}
@@ -158,7 +144,6 @@ jobs:
         include:
           - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
             backend: ${{ inputs.backend || 'level_zero:gpu' }}
-            reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }}
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
@@ -168,7 +153,6 @@ jobs:
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
       tests_selector: benchmark_v2
       benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}

From 8b3b79cae30920595d1e410d72034c8b57dd17b9 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 31 Mar 2025 10:26:41 -0700
Subject: [PATCH 084/114] Remove streaming median

---
 devops/scripts/benchmarks/utils/aggregate.py | 52 --------------------
 1 file changed, 52 deletions(-)

diff --git a/devops/scripts/benchmarks/utils/aggregate.py b/devops/scripts/benchmarks/utils/aggregate.py
index a6db4d36334c4..36ee7cbecaae6 100644
--- a/devops/scripts/benchmarks/utils/aggregate.py
+++ b/devops/scripts/benchmarks/utils/aggregate.py
@@ -1,4 +1,3 @@
-import heapq
 import statistics
 from abc import ABC, abstractmethod
 
@@ -52,54 +51,3 @@ def add(self, n: float):
 
     def get_avg(self) -> float:
         return statistics.median(self.elements)
-
-
-class StreamingMedian(Aggregator):
-    """
-    Calculate medians incrementally using heaps: Theoretically the fastest way
-    to calculate a median from a stream of elements, but realistically is only
-    faster when dealing with huge numbers of samples that would be generated by
-    i.e. enabling this workflow in precommit and using longer periods of time.
-    """
-
-    def __init__(self, starting_elements: list = []):
-        # Gist: we keep a minheap and a maxheap, and store the median as the top
-        # of the minheap. When a new element comes it gets put into the heap
-        # based on if the element is bigger than the current median. Then, the
-        # heaps are heapified and the median is repopulated by heapify.
-        self.minheap_larger = []
-        self.maxheap_smaller = []
-
-        map(lambda n: self.add(n), starting_elements)
-
-    @staticmethod
-    def get_type() -> str:
-        return "median"
-
-    # Note: numbers on maxheap should be negative, as heapq
-    # is minheap by default
-
-    def add(self, n: float):
-        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
-            heapq.heappush(self.maxheap_smaller, -n)
-        else:
-            heapq.heappush(self.minheap_larger, n)
-
-        # Ensure minheap has more elements than maxheap
-        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
-            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
-        elif len(self.maxheap_smaller) < len(self.minheap_larger):
-            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
-
-    def get_avg(self) -> float:
-        if len(self.maxheap_smaller) == len(self.minheap_larger):
-            # Equal number of elements smaller and larger than "median":
-            # thus, there are two median values. The median would then become
-            # the average of both median values.
-            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
-        else:
-            # Otherwise, median is always in minheap, as minheap is always
-            # bigger
-            return -self.maxheap_smaller[0]
-
-

From 3a070d588f062e68edf9bb1c0d022b67cc268554 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 31 Mar 2025 10:52:34 -0700
Subject: [PATCH 085/114] Add missing newlines

---
 .github/workflows/sycl-linux-run-tests.yml | 2 +-
 devops/scripts/benchmarks/compare.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 257b1d4723916..f73c731eec506 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -335,4 +335,4 @@ jobs:
         preset: ${{ inputs.benchmark_preset }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
\ No newline at end of file
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index d538577b0ce35..082648a1005ad 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -295,4 +295,4 @@ def print_regression(entry: dict):
             exit(1)  # Exit 1 to trigger github test failure
     else:
         print("Unsupported operation: exiting.")
-        exit(1)
\ No newline at end of file
+        exit(1)

From 186b36e1d14bd28dcb4b34cc408e7399173091a3 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 31 Mar 2025 11:54:53 -0700
Subject: [PATCH 086/114] Allegedly, runner name is already baked into
 github_env

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 59d3c0fa96f6f..ba5d74e45f88a 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -33,7 +33,6 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
-      RUNNER_NAME: ${{ runner.name }}
       PRESET: ${{ inputs.preset }}
     run: |
       case "$RUNNER_TAG" in
@@ -51,7 +50,6 @@ runs:
           echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
           exit 1
       fi
-      echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV 
 
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in

From de280a532de18a8b6804046673d622e1a6e07f43 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 31 Mar 2025 20:33:19 -0700
Subject: [PATCH 087/114] Modify save directory structure, amend hostname
 behavior for github runners

---
 .../actions/run-tests/benchmark_v2/action.yml |  5 +-
 devops/scripts/benchmarks/compare.py          | 83 ++++++++++++++-----
 devops/scripts/benchmarks/history.py          | 19 ++++-
 devops/scripts/benchmarks/utils/validate.py   | 10 +--
 4 files changed, 88 insertions(+), 29 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index ba5d74e45f88a..7507ea09492db 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -103,7 +103,6 @@ runs:
       echo "-----"
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       echo "-----"
-      mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME"
 
       case "$ONEAPI_DEVICE_SELECTOR" in
         level_zero:*) SAVE_SUFFIX="L0" ;;
@@ -120,8 +119,8 @@ runs:
         --sycl "$(realpath ./toolchain)" \
         --save "$SAVE_NAME" \
         --output-html remote \
-        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
-        --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \
+        --results-dir "./llvm-ci-perf-results/" \
+        --output-dir "./llvm-ci-perf-results/" \
         --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP"
       echo "-----"
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index 082648a1005ad..e4de190b76d03 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -39,8 +39,8 @@ class Compare:
     """Class containing logic for comparisons between results"""
     @staticmethod
     def get_hist_avg(
-        result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian,
-        exclude: list[str] = []
+        result_name: str, result_dir: str, hostname: str, cutoff: str,
+        aggregator: Aggregator = SimpleMedian, exclude: list[str] = []
     ) -> dict[str, BenchmarkHistoricAverage]:
         """
         Create a historic average for results named result_name in result_dir
@@ -51,6 +51,7 @@ def get_hist_avg(
             result_dir (str): Path to folder containing benchmark results
             cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
             average calcultaion
+            hostname (str): Hostname of machine on which results ran on 
             aggregator (Aggregator): The aggregator to use for calculating the
             historic average
             exclude (list[str]): List of filenames (only the stem) to exclude
@@ -60,6 +61,9 @@ def get_hist_avg(
             A dictionary mapping benchmark names to BenchmarkHistoricAverage
             objects
         """
+        if not Validate.timestamp(cutoff):
+            raise ValueError("Provided cutoff time is not a proper timestamp.")
+
         def get_timestamp(f: str) -> str:
             """Extract timestamp from result filename"""
             return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")]
@@ -67,7 +71,11 @@ def get_timestamp(f: str) -> str:
         def get_result_paths() -> list[str]:
             """
             Get a list of all results matching result_name in result_dir that is
-            newer than the timestamp specified by cutoff
+            newer than the timestamp specified by cutoff based off of filename.
+
+            This function assumes filenames of benchmark result files are
+            accurate; files returned by this function will be checked a second
+            time once their contents are actually loaded.
             """
             cache_dir = Path(f"{result_dir}")
 
@@ -84,6 +92,23 @@ def get_result_paths() -> list[str]:
                     cache_dir.glob(f"{result_name}_*_*.json")
                 )
             )
+        
+        def check_benchmark_result(result: BenchmarkRun) -> bool:
+            """
+            Returns True if result file:
+            - Was ran on the target machine/hostname specified
+            - Sanity check: ensure metadata are all expected values:
+              - Date is truly before cutoff timestamp
+              - Name truly matches up with specified result_name
+            """
+            if result.hostname != hostname:
+                return False
+            if result.name != result_name:
+                print(f"Warning: Result file {result_path} does not match specified result name {result.name}.")
+                return False
+            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S"):
+                return False
+            return True
 
         # key: name of the benchmark test result
         # value: { command_args: set[str], aggregate: Aggregator }
@@ -95,9 +120,13 @@ def get_result_paths() -> list[str]:
         for result_path in get_result_paths():
             with result_path.open('r') as result_f:
                 result = BenchmarkRun.from_json(json.load(result_f))
-            
-            if result.name != result_name:
-                print(f"Warning: Result file {result_path} has mismatching name {result.name}. Skipping file.")
+
+            # Perform another check on result file here, as get_result_paths()
+            # only filters out result files via filename, which:
+            # - does not contain enough information to filter out results, i.e.
+            #   no hostname information.
+            # - information in filename may be mismatched from metadata.
+            if not check_benchmark_result(result):
                 continue
 
             for test_run in result.results:
@@ -139,26 +168,25 @@ def reset_aggregate() -> dict:
     
 
     def to_hist_avg(
-        hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str
+        hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun
     ) -> tuple:
         """
-        Compare results in compare_file to a pre-existing map of historic
-        averages
+        Compare results in target to a pre-existing map of historic average.
+
+        Caution: Ensure the generated hist_avg is for results running on the
+        same host as target.hostname.
 
         Args:
             hist_avg (dict): A historic average map generated from get_hist_avg
-            compare_file (str): Full filepath of result to compare against
+            target (BenchmarkRun): results to compare against hist_avg
 
         Returns:
             A tuple returning (list of improved tests, list of regressed tests).
         """
-        with open(compare_file, 'r') as compare_f:
-            compare_result = BenchmarkRun.from_json(json.load(compare_f))
-
         improvement = []
         regression = []
 
-        for test in compare_result.results:
+        for test in target.results:
             if test.name not in hist_avg:
                 continue
             if hist_avg[test.name].command_args != set(test.command[1:]):
@@ -186,10 +214,9 @@ def perf_diff_entry() -> dict:
         return improvement, regression
 
 
-
     def to_hist(
-        avg_type: str, result_name: str, compare_file: str, result_dir: str, cutoff: str,
-        
+        avg_type: str, result_name: str, compare_file: str, result_dir: str,
+        cutoff: str,
     ) -> tuple:
         """
         Pregenerate a historic average from results named result_name in
@@ -213,17 +240,33 @@ def to_hist(
         """ 
 
         if avg_type != "median":
-            print("Only median is currently supported: refusing to continue.")
+            print("Only median is currently supported: Refusing to continue.")
+            exit(1)
+
+        try:
+            with open(compare_file, 'r') as compare_f:
+                compare_result = BenchmarkRun.from_json(json.load(compare_f))
+        except:
+            print(f"Unable to open {compare_file}.")
+            exit(1)
+
+        # Sanity checks:
+        if compare_result.hostname == "Unknown":
+            print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.")
+            exit(1)
+        if not Validate.timestamp(cutoff):
+            print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
             exit(1)
 
-        # TODO call validator on cutoff timestamp
+        # Build historic average and compare results against historic average:
         hist_avg = Compare.get_hist_avg(
             result_name,
             result_dir,
+            compare_result.hostname,
             cutoff,
             exclude=[Path(compare_file).stem]
         )
-        return Compare.to_hist_avg(hist_avg, compare_file)
+        return Compare.to_hist_avg(hist_avg, compare_result)
 
 
 if __name__ == "__main__":
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 191189fa1c4a9..cd3681b3d21cb 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -80,6 +80,23 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         except:
             git_hash = "unknown"
             github_repo = None
+        
+        # Check if RUNNER_NAME environment variable has been declared.
+        #
+        # RUNNER_NAME is always present in github runner environments. Because
+        # github runners obfusicate hostnames, using socket.gethostname()
+        # produces different hostnames when ran on the same machine multiple
+        # times. Thus, we rely on the RUNNER_NAME variable when running on
+        # github runners.
+        hostname = os.getenv("RUNNER_NAME")
+        if hostname is None:
+            hostname = socket.gethostname()
+        else if not Validate.runner_name(hostname):
+            # However, nothing stops github runner env variables (including
+            # RUNNER_NAME) from being modified by external actors. Ensure
+            # RUNNER_NAME contains nothing malicious:
+            # TODO is this overkill?
+            raise ValueError("Illegal characters found in specified RUNNER_NAME.")
 
         return BenchmarkRun(
             name=name,
@@ -87,7 +104,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
             github_repo=github_repo,
             date=datetime.now(tz=timezone.utc),
             results=results,
-            hostname=socket.gethostname(),
+            hostname=hostname,
         )
 
     def save(self, save_name, results: list[Result], to_file=True):
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
index 3d22c242f8301..893bba54ef630 100644
--- a/devops/scripts/benchmarks/utils/validate.py
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -4,12 +4,12 @@ class Validate:
     """Static class containing methods for validating various fields"""
 
     @staticmethod
-    def filepath(path: str) -> bool:
+    def runner_name(runner_name: str) -> bool:
         """
-        Returns True if path is clean (no illegal characters), otherwise False.
+        Returns True if runner_name is clean (no illegal characters).
         """
-        filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
-        return filepath_re.match(path) is not None
+        runner_name_re = re.compile(r"[a-zA-Z0-9_]+")
+        return runner_name_re.match(runner_name) is not None
 
     @staticmethod
     def timestamp(t: str) -> bool:
@@ -19,4 +19,4 @@ def timestamp(t: str) -> bool:
         timestamp_re = re.compile(
             r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
         )
-        return timestamp_re.match(t) is not None
+        return timestamp_re.match(t) is not None
\ No newline at end of file

From 4f5ce719a9472c94522c5d4b0a76c5c379df3ab4 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 31 Mar 2025 20:36:10 -0700
Subject: [PATCH 088/114] typo fix

---
 devops/scripts/benchmarks/history.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index cd3681b3d21cb..5ec99c18aed8a 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -91,7 +91,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
         hostname = os.getenv("RUNNER_NAME")
         if hostname is None:
             hostname = socket.gethostname()
-        else if not Validate.runner_name(hostname):
+        elif not Validate.runner_name(hostname):
             # However, nothing stops github runner env variables (including
             # RUNNER_NAME) from being modified by external actors. Ensure
             # RUNNER_NAME contains nothing malicious:

From 9bd519fee870535bc320ba58ab899685133955e7 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 1 Apr 2025 13:16:08 -0700
Subject: [PATCH 089/114] Ensure timezones are UTC

---
 devops/scripts/benchmarks/compare.py | 19 +++++++++++++++----
 devops/scripts/benchmarks/history.py |  2 +-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index e4de190b76d03..59ba2b9b69c96 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -1,4 +1,4 @@
-from utils.aggregate import SimpleMedian
+from utils.aggregate import Aggregator, SimpleMedian
 from utils.validate import Validate
 from utils.result import Result, BenchmarkRun
 from options import options
@@ -7,6 +7,7 @@
 import sys
 import json
 import argparse
+from datetime import datetime, timezone
 from pathlib import Path
 from dataclasses import dataclass, asdict
 
@@ -106,7 +107,7 @@ def check_benchmark_result(result: BenchmarkRun) -> bool:
             if result.name != result_name:
                 print(f"Warning: Result file {result_path} does not match specified result name {result.name}.")
                 return False
-            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S"):
+            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc):
                 return False
             return True
 
@@ -183,6 +184,14 @@ def to_hist_avg(
         Returns:
             A tuple returning (list of improved tests, list of regressed tests).
         """
+        def halfway_round(value: int, n: int):
+            """
+            Python's default round() does banker's rounding, which doesn't
+            make much sense here. This rounds 0.5 to 1, and -0.5 to -1
+            """
+            if value == 0: return 0
+            return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n
+
         improvement = []
         regression = []
 
@@ -206,9 +215,11 @@ def perf_diff_entry() -> dict:
                 res["avg_type"] = hist_avg[test.name].average_type
                 return res
 
-            if delta > options.regression_threshold:
+            # Round to 2 decimal places: not going to fail a test on 0.001% over
+            # regression threshold
+            if halfway_round(delta, 2) > options.regression_threshold:
                 improvement.append(perf_diff_entry())
-            elif delta < -options.regression_threshold:
+            elif halfway_round(delta, 2) < -options.regression_threshold:
                 regression.append(perf_diff_entry())
 
         return improvement, regression
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 5ec99c18aed8a..46d1d0e1d1212 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -120,7 +120,7 @@ def save(self, save_name, results: list[Result], to_file=True):
 
         # Use formatted timestamp for the filename
         timestamp = (
-            datetime.now().strftime("%Y%m%d_%H%M%S")
+            datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
             if options.timestamp_override is None else 
             options.timestamp_override
         )

From 3726a7dd5ff98920b2799b421eed4348b6493cb7 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 1 Apr 2025 13:16:20 -0700
Subject: [PATCH 090/114] Clarify options

---
 devops/scripts/benchmarks/main.py    | 10 +++++-----
 devops/scripts/benchmarks/options.py | 13 ++++++++++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 0484fb7c9654c..3ba6190843688 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -259,8 +259,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
         chart_data = {this_name: results}
 
     results_dir = directory
-    if options.custom_results_dir:
-        results_dir = Path(options.custom_results_dir)
+    if options.results_directory_override:
+        results_dir = Path(options.results_directory_override)
     history = BenchmarkHistory(results_dir)
     # limit how many files we load.
     # should this be configurable?
@@ -480,7 +480,7 @@ def validate_and_parse_env_args(env_args):
         "--results-dir",
         type=str,
         help="Specify a custom directory to load/store (historical) results from",
-        default=options.custom_results_dir,
+        default=options.results_directory_override,
     )
     parser.add_argument(
         "--build-jobs",
@@ -526,7 +526,7 @@ def validate_and_parse_env_args(env_args):
     options.cudnn_directory = args.cudnn_directory
     options.cublas_directory = args.cublas_directory
     options.preset = args.preset
-    options.custom_results_dir = args.results_dir
+    options.results_directory_override = args.results_dir
     options.build_jobs = args.build_jobs
     options.hip_arch = args.hip_arch
 
@@ -546,7 +546,7 @@ def validate_and_parse_env_args(env_args):
     if args.results_dir is not None:
         if not os.path.isdir(args.results_dir):
             parser.error("Specified --results-dir is not a valid path")
-        options.custom_results_dir = os.path.abspath(args.results_dir)
+        options.results_directory_override = os.path.abspath(args.results_dir)
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 74a4487807d07..e513767e05747 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -44,11 +44,22 @@ class Options:
     build_igc: bool = False
     current_run_name: str = "This PR"
     preset: str = "Full"
-    custom_results_dir = None
     build_jobs: int = multiprocessing.cpu_count()
 
+    # Options applicable to CI only:
     regression_threshold: float = 0.05
+    # In CI, it may be necessary to e.g. compare or redo benchmark runs.
+    # A timestamp is generated at the beginning of the CI run and used through
+    # the entire CI process, instead of scripts generating their own timestamps
+    # every time a script runs (default behavior).
     timestamp_override: str = None
+    # By default, the directory to fetch results from is the benchmark working
+    # directory specified in the CLI args, hence a default value of "None" as
+    # the value is decided via runtime.
+    #
+    # However, sometimes you may want to fetch results from a different
+    # directory, i.e. in CI when you clone the results directory elsewhere.
+    results_directory_override: str = None
 
 
 options = Options()

From 60d80a99cd2c58573e35654f95ba8e952d093d74 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 10:57:55 -0700
Subject: [PATCH 091/114] enforce UTC time in benchmark action

---
 devops/actions/run-tests/benchmark_v2/action.yml | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 7507ea09492db..c68dda5e0d3d0 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -101,6 +101,12 @@ runs:
       echo "-----"
       sycl-ls
       echo "-----"
+      # Using --break-system-packages because:
+      # - venv is not installed
+      # - unable to install anything via pip, as python packages in the docker
+      #   container are managed by apt
+      # - apt is unable to install anything due to unresolved dpkg dependencies,
+      #   as a result of how the sycl nightly images are created
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       echo "-----"
 
@@ -112,7 +118,7 @@ runs:
       esac
       # TODO accomodate for different GPUs and backends
       SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}"
-      SAVE_TIMESTAMP="$(date +'%Y%m%d_%H%M%S')"
+      SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')"  # Timestamps are in UTC time
 
       taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
         "$(realpath ./llvm_test_workdir)" \
@@ -126,8 +132,8 @@ runs:
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
         --name Baseline_PVC_L0 \
-        --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
-        --results-dir "./llvm-ci-perf-results/$RUNNER_NAME/results/"
+        --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
+        --results-dir "./llvm-ci-perf-results/results/"
 
   - name: Push compute-benchmarks results
     if: inputs.upload_results == 'true' && always()

From c69e8745bfd41c79fd4af73ebdc6c690159b4e72 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 21:43:38 -0700
Subject: [PATCH 092/114] Properly load repo/commit information in CI

---
 .../actions/run-tests/benchmark_v2/action.yml | 20 ++++-
 devops/scripts/benchmarks/history.py          | 73 +++++++++++--------
 devops/scripts/benchmarks/main.py             | 57 +++++++++++----
 devops/scripts/benchmarks/options.py          | 19 +++--
 devops/scripts/benchmarks/utils/validate.py   | 60 +++++++++++++--
 5 files changed, 167 insertions(+), 62 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index c68dda5e0d3d0..903cb07256c36 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -110,6 +110,19 @@ runs:
       pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
       echo "-----"
 
+      # clang builds have git repo / commit hashes in their --version output,
+      # same goes for dpcpp. Obtain git repo / commit hash info this way:
+
+      # First line of --version is formatted 'clang version ... (<repo> <commit>)'
+      # thus we parse for (<repo> <commit>):
+      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()'
+      if [ -z "$sycl_git_info" ]; then
+        echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
+        exit 1
+      fi
+      sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
+      sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
+
       case "$ONEAPI_DEVICE_SELECTOR" in
         level_zero:*) SAVE_SUFFIX="L0" ;;
         level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
@@ -128,10 +141,13 @@ runs:
         --results-dir "./llvm-ci-perf-results/" \
         --output-dir "./llvm-ci-perf-results/" \
         --preset "$PRESET" \
-        --timestamp-override "$SAVE_TIMESTAMP"
+        # CI options:
+        --timestamp-override "$SAVE_TIMESTAMP" \
+        --sycl-github-repo "$sycl_git_repo" \ 
+        --sycl-commit "$sycl_git_commit"
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
-        --name Baseline_PVC_L0 \
+        --name "$SAVE_NAME" \
         --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
         --results-dir "./llvm-ci-perf-results/results/"
 
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 46d1d0e1d1212..eaaaa8276a8be 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -58,45 +58,58 @@ def extract_timestamp(file_path: Path) -> str:
         self.runs = benchmark_runs
 
     def create_run(self, name: str, results: list[Result]) -> BenchmarkRun:
-        try:
-            script_dir = os.path.dirname(os.path.abspath(__file__))
-            result = run("git rev-parse --short HEAD", cwd=script_dir)
-            git_hash = result.stdout.decode().strip()
-
-            # Get the GitHub repo URL from git remote
-            remote_result = run("git remote get-url origin", cwd=script_dir)
-            remote_url = remote_result.stdout.decode().strip()
-
-            # Convert SSH or HTTPS URL to owner/repo format
-            if remote_url.startswith("git@github.com:"):
-                # SSH format: git@github.com:owner/repo.git
-                github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
-            elif remote_url.startswith("https://github.com/"):
-                # HTTPS format: https://github.com/owner/repo.git
-                github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
-            else:
+
+        def git_info_from_path(path: Path) -> (str, str):
+            """
+            Derives git repo, commit information from git repo located in path.
+
+            Returns:
+                (str, str): git_hash, github_repo
+            """
+            try:
+                result = run("git rev-parse --short HEAD", cwd=path)
+                git_hash = result.stdout.decode().strip()
+
+                # Get the GitHub repo URL from git remote
+                remote_result = run("git remote get-url origin", cwd=path)
+                remote_url = remote_result.stdout.decode().strip()
+
+                # Convert SSH or HTTPS URL to owner/repo format
+                if remote_url.startswith("git@github.com:"):
+                    # SSH format: git@github.com:owner/repo.git
+                    github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
+                elif remote_url.startswith("https://github.com/"):
+                    # HTTPS format: https://github.com/owner/repo.git
+                    github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+                else:
+                    github_repo = None
+
+            except:
+                git_hash = "unknown"
                 github_repo = None
+            
+            return git_hash, github_repo
 
-        except:
-            git_hash = "unknown"
-            github_repo = None
+        if options.sycl_commit is None or options.sycl_github_repo is None:
+            git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__)))
+        else:
+            git_hash, github_repo = options.sycl_commit, options.sycl_github_repo
         
         # Check if RUNNER_NAME environment variable has been declared.
         #
-        # RUNNER_NAME is always present in github runner environments. Because
-        # github runners obfusicate hostnames, using socket.gethostname()
-        # produces different hostnames when ran on the same machine multiple
-        # times. Thus, we rely on the RUNNER_NAME variable when running on
-        # github runners.
+        # Github runners obfusicate hostnames, thus running socket.gethostname()
+        # twice produces two different hostnames. Since github runners always
+        # define a RUNNER_NAME variable, use RUNNER_NAME instead if it exists:
         hostname = os.getenv("RUNNER_NAME")
         if hostname is None:
             hostname = socket.gethostname()
-        elif not Validate.runner_name(hostname):
-            # However, nothing stops github runner env variables (including
-            # RUNNER_NAME) from being modified by external actors. Ensure
-            # RUNNER_NAME contains nothing malicious:
+        else:
+            # Ensure RUNNER_NAME has not been tampered with:
             # TODO is this overkill?
-            raise ValueError("Illegal characters found in specified RUNNER_NAME.")
+            Validate.runner_name(
+                hostname,
+                throw=ValueError("Illegal characters found in specified RUNNER_NAME.")
+            )
 
         return BenchmarkRun(
             name=name,
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 3ba6190843688..04d8a37510d9b 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -476,12 +476,6 @@ def validate_and_parse_env_args(env_args):
         help="Benchmark preset to run",
         default=options.preset,
     )
-    parser.add_argument(
-        "--results-dir",
-        type=str,
-        help="Specify a custom directory to load/store (historical) results from",
-        default=options.results_directory_override,
-    )
     parser.add_argument(
         "--build-jobs",
         type=int,
@@ -489,16 +483,45 @@ def validate_and_parse_env_args(env_args):
         default=options.build_jobs,
     )
     parser.add_argument(
-        "--timestamp-override",
+        "--hip-arch",
         type=str,
-        help="Used in CI to enforce use of same timestamp across scripts",
+        help="HIP device architecture",
         default=None,
     )
+
+    # Options intended for CI:
     parser.add_argument(
-        "--hip-arch",
+        "--results-dir",
         type=str,
-        help="HIP device architecture",
-        default=None,
+        help="Specify a custom directory to load/store (historical) results from",
+        default=options.results_directory_override,
+    )
+    parser.add_argument(
+        "--timestamp-override",
+        type=lambda ts: Validate.timestamp(
+            ts,
+            throw=argparse.ArgumentTypeError("Specified timestamp not in YYYYMMDD_HHMMSS format.")
+        ),
+        help="Manually specify timestamp used in metadata",
+        default=options.timestamp_override,
+    )
+    parser.add_argument(
+        "--sycl-github-repo",
+        type=lambda gh_repo: Validate.github_repo(
+            gh_repo,
+            throw=argparse.ArgumentTypeError("Specified SYCL github repo not in <owner>/<repo> format.")
+        ),
+        help="Manually specify SYCL github repo used in metadata",
+        default=options.sycl_github_repo,
+    )
+    parser.add_argument(
+        "--sycl-commit",
+        type=lambda commit: Validate.commit_hash(
+            commit,
+            throw=argparse.ArgumentTypeError("Specified SYCL commit is not a valid commit hash.")
+        ),
+        help="Manually specify commit hash used to build SYCL in metadata",
+        default=options.sycl_commit,
     )
 
     args = parser.parse_args()
@@ -539,14 +562,18 @@ def validate_and_parse_env_args(env_args):
         if not os.path.isdir(args.output_dir):
             parser.error("Specified --output-dir is not a valid path")
         options.output_directory = os.path.abspath(args.output_dir)
-    if args.timestamp_override is not None:
-        if not Validate.timestamp(args.timestamp_override):
-            parser.error("--timestamp_override is not a valid timestamp")
-        options.timestamp_override = args.timestamp_override
+
+    # Options intended for CI:
+    options.timestamp_override = args.timestamp_override
     if args.results_dir is not None:
         if not os.path.isdir(args.results_dir):
             parser.error("Specified --results-dir is not a valid path")
         options.results_directory_override = os.path.abspath(args.results_dir)
+    if args.sycl_github_repo is not None or args.sycl_commit is not None:
+        if args.sycl_github_repo is None or args.sycl_commit is None:
+            parser.error("--sycl-github-repo and --sycl-commit must both be defined together")
+        options.sycl_github_repo = args.sycl_github_repo
+        options.sycl_commit = args.sycl_commit
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index e513767e05747..9063dd23585da 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -46,20 +46,23 @@ class Options:
     preset: str = "Full"
     build_jobs: int = multiprocessing.cpu_count()
 
-    # Options applicable to CI only:
+    # Options intended for CI:
     regression_threshold: float = 0.05
-    # In CI, it may be necessary to e.g. compare or redo benchmark runs.
-    # A timestamp is generated at the beginning of the CI run and used through
-    # the entire CI process, instead of scripts generating their own timestamps
-    # every time a script runs (default behavior).
+    # It's necessary in CI to compare or redo benchmark runs. Instead of
+    # generating a new timestamp each run by default, specify a single timestamp
+    # to use across the entire CI run.
     timestamp_override: str = None
-    # By default, the directory to fetch results from is the benchmark working
-    # directory specified in the CLI args, hence a default value of "None" as
-    # the value is decided via runtime.
+    # The default directory to fetch results from is args.benchmark_directory,
+    # hence a default value of "None" as the value is decided during runtime.
     #
     # However, sometimes you may want to fetch results from a different
     # directory, i.e. in CI when you clone the results directory elsewhere.
     results_directory_override: str = None
+    # By default, we fetch SYCL commit info from the folder where main.py is
+    # located. This doesn't work right when CI uses different commits for e.g.
+    # CI scripts vs SYCl build source.
+    sycl_github_repo: str = None
+    sycl_commit: str = None
 
 
 options = Options()
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
index 893bba54ef630..77bd13f4f9971 100644
--- a/devops/scripts/benchmarks/utils/validate.py
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -1,22 +1,68 @@
 import re
 
+def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None):
+    """
+    Returns True if val is matched by pattern defined by regex, otherwise False.
+
+    If `throw` argument is not None: return val as-is if val matches regex,
+    otherwise raise error defined by throw.
+    """
+    is_matching: bool = re.compile(regex).match(val) is not None
+
+    if throw is None: return is_matching
+    elif not is_matching: raise throw
+    else: return val
+
+
 class Validate:
     """Static class containing methods for validating various fields"""
 
     @staticmethod
-    def runner_name(runner_name: str) -> bool:
+    def runner_name(runner_name: str, throw: Exception = None):
         """
         Returns True if runner_name is clean (no illegal characters).
         """
-        runner_name_re = re.compile(r"[a-zA-Z0-9_]+")
-        return runner_name_re.match(runner_name) is not None
+        return validate_on_re(runner_name, r"^[a-zA-Z0-9_]+$", throw=throw)
 
     @staticmethod
-    def timestamp(t: str) -> bool:
+    def timestamp(t: str, throw: Exception = None):
         """
         Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
+
+        If throw argument is specified: return t as-is if t is in aforementioned
+        format, otherwise raise error defined by throw.
+        """
+        return validate_on_re(
+            t,
+            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$",
+            throw=throw
+        )
+
+    @staticmethod
+    def github_repo(repo: str, throw: Exception = None):
+        """
+        Returns True if repo is of form <owner>/<repo name>
+
+        If throw argument is specified: return repo as-is if repo is in
+        aforementioned format, otherwise raise error defined by throw.
         """
-        timestamp_re = re.compile(
-            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
+        return validate_on_re(
+            re.sub(r"^https?://github.com/", "", repo),
+            r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$",
+            throw=throw
         )
-        return timestamp_re.match(t) is not None
\ No newline at end of file
+
+    @staticmethod
+    def commit_hash(commit: str, throw: Exception = None, trunc: int = 40):
+        """
+        Returns True if commit is a valid git commit hash.
+
+        If throw argument is specified: return commit hash (truncated to trunc
+        chars long) if commit is a valid commit hash, otherwise raise error
+        defined by throw.
+        """
+        commit_re = r"^[a-f0-9]{7,40}$"
+        if throw is None:
+            return validate_on_re(commit, commit_re)
+        else:
+            return validate_on_re(commit, commit_re, throw=throw)[:trunc]

From 6224eaa01ce9fc052a142f2ea5d9f7bcb8702c93 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 22:31:31 -0700
Subject: [PATCH 093/114] [test] debug message

---
 devops/actions/run-tests/benchmark_v2/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 903cb07256c36..1bfa605c98e8b 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -115,6 +115,7 @@ runs:
 
       # First line of --version is formatted 'clang version ... (<repo> <commit>)'
       # thus we parse for (<repo> <commit>):
+      clang++ --version
       sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()'
       if [ -z "$sycl_git_info" ]; then
         echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"

From f0a9a9722cab9f6462deb8db84361d71b68d04ed Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 22:36:52 -0700
Subject: [PATCH 094/114] I forgot a )

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 1bfa605c98e8b..3964d4f801851 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -116,7 +116,7 @@ runs:
       # First line of --version is formatted 'clang version ... (<repo> <commit>)'
       # thus we parse for (<repo> <commit>):
       clang++ --version
-      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()'
+      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()')"
       if [ -z "$sycl_git_info" ]; then
         echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
         exit 1

From b68c11914967063c192705bc495389c0beaba5b2 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 22:39:51 -0700
Subject: [PATCH 095/114] misplaced )

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 3964d4f801851..f83276c3f2dba 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -116,7 +116,7 @@ runs:
       # First line of --version is formatted 'clang version ... (<repo> <commit>)'
       # thus we parse for (<repo> <commit>):
       clang++ --version
-      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()')"
+      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')"
       if [ -z "$sycl_git_info" ]; then
         echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
         exit 1

From cc17af9f71f5f6ef2d2c7a35702133aec5b07370 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 22:47:24 -0700
Subject: [PATCH 096/114] revert test

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index f83276c3f2dba..12052e33a6b76 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -115,7 +115,6 @@ runs:
 
       # First line of --version is formatted 'clang version ... (<repo> <commit>)'
       # thus we parse for (<repo> <commit>):
-      clang++ --version
       sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')"
       if [ -z "$sycl_git_info" ]; then
         echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
@@ -142,7 +141,6 @@ runs:
         --results-dir "./llvm-ci-perf-results/" \
         --output-dir "./llvm-ci-perf-results/" \
         --preset "$PRESET" \
-        # CI options:
         --timestamp-override "$SAVE_TIMESTAMP" \
         --sycl-github-repo "$sycl_git_repo" \ 
         --sycl-commit "$sycl_git_commit"

From 64832a64d27820c595729f4823d1d4f2f0e54fc4 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 23:04:27 -0700
Subject: [PATCH 097/114] [test] debug statements

---
 devops/actions/run-tests/benchmark_v2/action.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 12052e33a6b76..b7c3840948681 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -122,6 +122,7 @@ runs:
       fi
       sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
       sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
+      echo "$sycl_git_repo, $sycl_git_commit"
 
       case "$ONEAPI_DEVICE_SELECTOR" in
         level_zero:*) SAVE_SUFFIX="L0" ;;

From ab0700178b7bccdbcd3fae34747b63467e41b11b Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 2 Apr 2025 23:08:41 -0700
Subject: [PATCH 098/114] Whitespace was causing issues?

---
 devops/actions/run-tests/benchmark_v2/action.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index b7c3840948681..928a19d8626c0 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -143,7 +143,7 @@ runs:
         --output-dir "./llvm-ci-perf-results/" \
         --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP" \
-        --sycl-github-repo "$sycl_git_repo" \ 
+        --sycl-github-repo "$sycl_git_repo" \
         --sycl-commit "$sycl_git_commit"
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \

From 2b94436f9d6c1ccc5f872ef2813d6295af938e25 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 3 Apr 2025 14:13:45 -0700
Subject: [PATCH 099/114] rename variables and remove sycl_ prefix

---
 devops/scripts/benchmarks/compare.py |  7 ++-----
 devops/scripts/benchmarks/history.py |  4 ++--
 devops/scripts/benchmarks/main.py    | 26 +++++++++++++-------------
 devops/scripts/benchmarks/options.py |  4 ++--
 4 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index 59ba2b9b69c96..fbce279a9ea13 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -94,7 +94,7 @@ def get_result_paths() -> list[str]:
                 )
             )
         
-        def check_benchmark_result(result: BenchmarkRun) -> bool:
+        def validate_benchmark_result(result: BenchmarkRun) -> bool:
             """
             Returns True if result file:
             - Was ran on the target machine/hostname specified
@@ -127,16 +127,13 @@ def check_benchmark_result(result: BenchmarkRun) -> bool:
             # - does not contain enough information to filter out results, i.e.
             #   no hostname information.
             # - information in filename may be mismatched from metadata.
-            if not check_benchmark_result(result):
+            if not validate_benchmark_result(result):
                 continue
 
             for test_run in result.results:
                 def reset_aggregate() -> dict:
                     return { 
                         "command_args": set(test_run.command[1:]),
-                        # The assumption here is that "value" is median
-                        # TODO standardization should happen here on what "value"
-                        # really is
                         "aggregate": aggregator(starting_elements=[test_run.value])
                     }
 
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index eaaaa8276a8be..e28914e74ad13 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -90,10 +90,10 @@ def git_info_from_path(path: Path) -> (str, str):
             
             return git_hash, github_repo
 
-        if options.sycl_commit is None or options.sycl_github_repo is None:
+        if options.git_commit_override is None or options.github_repo_override is None:
             git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__)))
         else:
-            git_hash, github_repo = options.sycl_commit, options.sycl_github_repo
+            git_hash, github_repo = options.git_commit_override, options.github_repo_override
         
         # Check if RUNNER_NAME environment variable has been declared.
         #
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 04d8a37510d9b..2006a9084d932 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -506,22 +506,22 @@ def validate_and_parse_env_args(env_args):
         default=options.timestamp_override,
     )
     parser.add_argument(
-        "--sycl-github-repo",
+        "--github-repo",
         type=lambda gh_repo: Validate.github_repo(
             gh_repo,
-            throw=argparse.ArgumentTypeError("Specified SYCL github repo not in <owner>/<repo> format.")
+            throw=argparse.ArgumentTypeError("Specified github repo not in <owner>/<repo> format.")
         ),
-        help="Manually specify SYCL github repo used in metadata",
-        default=options.sycl_github_repo,
+        help="Manually specify github repo metadata of component tested (e.g. SYCL, UMF)",
+        default=options.github_repo_override,
     )
     parser.add_argument(
-        "--sycl-commit",
+        "--git-commit",
         type=lambda commit: Validate.commit_hash(
             commit,
-            throw=argparse.ArgumentTypeError("Specified SYCL commit is not a valid commit hash.")
+            throw=argparse.ArgumentTypeError("Specified commit is not a valid commit hash.")
         ),
-        help="Manually specify commit hash used to build SYCL in metadata",
-        default=options.sycl_commit,
+        help="Manually specify commit hash metadata of component tested (e.g. SYCL, UMF)",
+        default=options.git_commit_override,
     )
 
     args = parser.parse_args()
@@ -569,11 +569,11 @@ def validate_and_parse_env_args(env_args):
         if not os.path.isdir(args.results_dir):
             parser.error("Specified --results-dir is not a valid path")
         options.results_directory_override = os.path.abspath(args.results_dir)
-    if args.sycl_github_repo is not None or args.sycl_commit is not None:
-        if args.sycl_github_repo is None or args.sycl_commit is None:
-            parser.error("--sycl-github-repo and --sycl-commit must both be defined together")
-        options.sycl_github_repo = args.sycl_github_repo
-        options.sycl_commit = args.sycl_commit
+    if args.github_repo is not None or args.git_commit is not None:
+        if args.github_repo is None or args.git_commit is None:
+            parser.error("--github-repo and --git_commit must both be defined together")
+        options.github_repo_override = args.github_repo
+        options.git_commit_override = args.git_commit
 
     benchmark_filter = re.compile(args.filter) if args.filter else None
 
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index 9063dd23585da..b96801de0cb06 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -61,8 +61,8 @@ class Options:
     # By default, we fetch SYCL commit info from the folder where main.py is
     # located. This doesn't work right when CI uses different commits for e.g.
     # CI scripts vs SYCl build source.
-    sycl_github_repo: str = None
-    sycl_commit: str = None
+    github_repo_override: str = None
+    git_commit_override: str = None
 
 
 options = Options()

From 63c3092428fb7ce0e2b4c41e312907572a665969 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Thu, 3 Apr 2025 14:14:04 -0700
Subject: [PATCH 100/114] Delete text message, fix whitespace

---
 .github/workflows/sycl-docs.yml                  | 6 +++---
 devops/actions/run-tests/benchmark_v2/action.yml | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml
index 6b748ec9c7ebb..7bb6a568892a8 100644
--- a/.github/workflows/sycl-docs.yml
+++ b/.github/workflows/sycl-docs.yml
@@ -52,10 +52,10 @@ jobs:
         cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks
         touch .nojekyll
         # Update benchmarking dashboard configuration
-        cat << 'EOF' > benchmarks/config.js
-        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json';
+        cat << EOF > benchmarks/config.js
+        remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/data.json';
         defaultCompareNames = ["Baseline_PVC_L0"];
-        EOF 
+        EOF
     # Upload the generated docs as an artifact and deploy to GitHub Pages.
     - name: Upload artifact
       uses: actions/upload-pages-artifact@v3
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 928a19d8626c0..e829b7c288bbd 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -122,7 +122,6 @@ runs:
       fi
       sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
       sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
-      echo "$sycl_git_repo, $sycl_git_commit"
 
       case "$ONEAPI_DEVICE_SELECTOR" in
         level_zero:*) SAVE_SUFFIX="L0" ;;
@@ -143,8 +142,8 @@ runs:
         --output-dir "./llvm-ci-perf-results/" \
         --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP" \
-        --sycl-github-repo "$sycl_git_repo" \
-        --sycl-commit "$sycl_git_commit"
+        --github-repo "$sycl_git_repo" \
+        --git-commit "$sycl_git_commit"
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
         --name "$SAVE_NAME" \

From ca96184b149ed842ca67709e57f7e98627ce9204 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 4 Apr 2025 13:48:06 -0700
Subject: [PATCH 101/114] Set up multiple push attempts in CI

---
 .../actions/run-tests/benchmark_v2/action.yml | 40 ++++++++++++++++---
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index e829b7c288bbd..734bd45c8b16d 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -157,13 +157,43 @@ runs:
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
       git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
+      results_branch="unify-ci"
+
       git add .
-      # Make sure changes have been made
       if git diff --quiet && git diff --cached --quiet; then
         echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci
+        exit 0
       fi
 
+      for attempt in 1 2 3; do
+        echo "Attempt $attempt to push new results"
+        git add .
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)"
+
+        if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then
+          echo "Push succeeded"
+          break
+        fi
+
+        echo "Push failed, retrying..."
+        if [ -n "$results_file" ]; then
+          cached_result="$(mktemp -d)/$(basename $results_file)"
+          mv "$results_file" "$cached_result"
+
+          git reset --hard "origin/$results_branch"
+          git pull origin "$results_branch"
+
+          mv "$cached_result" "$results_file"
+        fi
+
+        echo "Regenerating data.json..."
+        cd ../
+        ./devops/scripts/benchmarks/main.py \
+          "$(realpath ./llvm_test_workdir)" \
+          --output-html remote \
+          --results-dir "./llvm-ci-perf-results/" \
+          --output-dir "./llvm-ci-perf-results/" \
+          --dry-run
+        cd -
+      done

From a3d7ff6e0606d2d2be74acd71b2f0d753864f739 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Fri, 4 Apr 2025 14:12:41 -0700
Subject: [PATCH 102/114] Apply clang format

---
 devops/scripts/benchmarks/compare.py        | 119 ++++++++++++--------
 devops/scripts/benchmarks/history.py        |  31 +++--
 devops/scripts/benchmarks/main.py           |  12 +-
 devops/scripts/benchmarks/presets.py        |  21 ++--
 devops/scripts/benchmarks/utils/validate.py |  14 ++-
 5 files changed, 119 insertions(+), 78 deletions(-)

diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index fbce279a9ea13..47170841c693e 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -11,9 +11,11 @@
 from pathlib import Path
 from dataclasses import dataclass, asdict
 
+
 @dataclass
 class BenchmarkHistoricAverage:
     """Contains historic average information for 1 benchmark"""
+
     # Name of benchmark as defined in Benchmark class definition
     name: str
 
@@ -32,27 +34,32 @@ class BenchmarkHistoricAverage:
     #
     # This exists to ensure benchmarks called using different arguments are not
     # compared together.
-    command_args: set[str] 
+    command_args: set[str]
     # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
 
 
 class Compare:
     """Class containing logic for comparisons between results"""
+
     @staticmethod
     def get_hist_avg(
-        result_name: str, result_dir: str, hostname: str, cutoff: str,
-        aggregator: Aggregator = SimpleMedian, exclude: list[str] = []
+        result_name: str,
+        result_dir: str,
+        hostname: str,
+        cutoff: str,
+        aggregator: Aggregator = SimpleMedian,
+        exclude: list[str] = [],
     ) -> dict[str, BenchmarkHistoricAverage]:
         """
         Create a historic average for results named result_name in result_dir
         using the specified aggregator
 
         Args:
-            result_name (str): Name of benchmarking result to obtain average for 
+            result_name (str): Name of benchmarking result to obtain average for
             result_dir (str): Path to folder containing benchmark results
             cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in
             average calcultaion
-            hostname (str): Hostname of machine on which results ran on 
+            hostname (str): Hostname of machine on which results ran on
             aggregator (Aggregator): The aggregator to use for calculating the
             historic average
             exclude (list[str]): List of filenames (only the stem) to exclude
@@ -90,10 +97,10 @@ def get_result_paths() -> list[str]:
                     # Result file is not excluded
                     and f.stem not in exclude,
                     # Assumes format is <name>_YYYYMMDD_HHMMSS.json
-                    cache_dir.glob(f"{result_name}_*_*.json")
+                    cache_dir.glob(f"{result_name}_*_*.json"),
                 )
             )
-        
+
         def validate_benchmark_result(result: BenchmarkRun) -> bool:
             """
             Returns True if result file:
@@ -105,21 +112,25 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
             if result.hostname != hostname:
                 return False
             if result.name != result_name:
-                print(f"Warning: Result file {result_path} does not match specified result name {result.name}.")
+                print(
+                    f"Warning: Result file {result_path} does not match specified result name {result.name}."
+                )
                 return False
-            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc):
+            if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(
+                tzinfo=timezone.utc
+            ):
                 return False
             return True
 
         # key: name of the benchmark test result
         # value: { command_args: set[str], aggregate: Aggregator }
-        # 
+        #
         # This is then used to build a dict[BenchmarkHistoricAverage] used
         # to find historic averages.
         average_aggregate: dict[str, dict] = dict()
-        
+
         for result_path in get_result_paths():
-            with result_path.open('r') as result_f:
+            with result_path.open("r") as result_f:
                 result = BenchmarkRun.from_json(json.load(result_f))
 
             # Perform another check on result file here, as get_result_paths()
@@ -131,10 +142,11 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
                 continue
 
             for test_run in result.results:
+
                 def reset_aggregate() -> dict:
-                    return { 
+                    return {
                         "command_args": set(test_run.command[1:]),
-                        "aggregate": aggregator(starting_elements=[test_run.value])
+                        "aggregate": aggregator(starting_elements=[test_run.value]),
                     }
 
                 # Add every benchmark run to average_aggregate:
@@ -142,28 +154,36 @@ def reset_aggregate() -> dict:
                     average_aggregate[test_run.name] = reset_aggregate()
                 else:
                     # Check that we are comparing runs with the same cmd args:
-                    if set(test_run.command[1:]) == average_aggregate[test_run.name]["command_args"]:
-                        average_aggregate[test_run.name]["aggregate"].add(test_run.value)
+                    if (
+                        set(test_run.command[1:])
+                        == average_aggregate[test_run.name]["command_args"]
+                    ):
+                        average_aggregate[test_run.name]["aggregate"].add(
+                            test_run.value
+                        )
                     else:
                         # If the command args used between runs are different,
                         # discard old run data and prefer new command args
                         #
                         # This relies on the fact that paths from get_result_paths()
                         # is sorted from older to newer
-                        print(f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs.")
-                        print("DISCARDING older data and OVERRIDING with data using new arg.")
+                        print(
+                            f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs."
+                        )
+                        print(
+                            "DISCARDING older data and OVERRIDING with data using new arg."
+                        )
                         average_aggregate[test_run.name] = reset_aggregate()
-            
+
         return {
             name: BenchmarkHistoricAverage(
                 name=name,
                 average_type=stats["aggregate"].get_type(),
                 value=stats["aggregate"].get_avg(),
-                command_args=stats["command_args"]
+                command_args=stats["command_args"],
             )
             for name, stats in average_aggregate.items()
         }
-    
 
     def to_hist_avg(
         hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun
@@ -181,12 +201,14 @@ def to_hist_avg(
         Returns:
             A tuple returning (list of improved tests, list of regressed tests).
         """
+
         def halfway_round(value: int, n: int):
             """
             Python's default round() does banker's rounding, which doesn't
             make much sense here. This rounds 0.5 to 1, and -0.5 to -1
             """
-            if value == 0: return 0
+            if value == 0:
+                return 0
             return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n
 
         improvement = []
@@ -198,11 +220,11 @@ def halfway_round(value: int, n: int):
             if hist_avg[test.name].command_args != set(test.command[1:]):
                 print(f"Warning: skipped {test.name} due to command args mismatch.")
                 continue
-            
+
             delta = 1 - (
                 test.value / hist_avg[test.name].value
-                if test.lower_is_better else 
-                hist_avg[test.name].value / test.value
+                if test.lower_is_better
+                else hist_avg[test.name].value / test.value
             )
 
             def perf_diff_entry() -> dict:
@@ -221,9 +243,11 @@ def perf_diff_entry() -> dict:
 
         return improvement, regression
 
-
     def to_hist(
-        avg_type: str, result_name: str, compare_file: str, result_dir: str,
+        avg_type: str,
+        result_name: str,
+        compare_file: str,
+        result_dir: str,
         cutoff: str,
     ) -> tuple:
         """
@@ -236,7 +260,7 @@ def to_hist(
             result_dir (str): Directory to look for results in
             cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest
             result included in the historic average calculation
-            avg_type (str): Type of "average" (measure of central tendency) to 
+            avg_type (str): Type of "average" (measure of central tendency) to
             use in historic "average" calculation
 
         Returns:
@@ -245,14 +269,14 @@ def to_hist(
             avg_type, and delta field added, indicating the historic average,
             type of central tendency used for historic average, and the delta
             from the average for this benchmark run.
-        """ 
+        """
 
         if avg_type != "median":
             print("Only median is currently supported: Refusing to continue.")
             exit(1)
 
         try:
-            with open(compare_file, 'r') as compare_f:
+            with open(compare_file, "r") as compare_f:
                 compare_result = BenchmarkRun.from_json(json.load(compare_f))
         except:
             print(f"Unable to open {compare_file}.")
@@ -260,7 +284,9 @@ def to_hist(
 
         # Sanity checks:
         if compare_result.hostname == "Unknown":
-            print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.")
+            print(
+                "Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue."
+            )
             exit(1)
         if not Validate.timestamp(cutoff):
             print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
@@ -272,7 +298,7 @@ def to_hist(
             result_dir,
             compare_result.hostname,
             cutoff,
-            exclude=[Path(compare_file).stem]
+            exclude=[Path(compare_file).stem],
         )
         return Compare.to_hist_avg(hist_avg, compare_result)
 
@@ -280,36 +306,35 @@ def to_hist(
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Compare benchmark results")
     subparsers = parser.add_subparsers(dest="operation", required=True)
-    parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average")
+    parser_avg = subparsers.add_parser(
+        "to_hist", help="Compare a benchmark result to historic average"
+    )
     parser_avg.add_argument(
         "--avg-type",
         type=str,
         help="Measure of central tendency to use when computing historic average",
-        default="median"
+        default="median",
     )
     parser_avg.add_argument(
         "--name",
         type=str,
         required=True,
-        help="Save name of the benchmark results to compare to"
+        help="Save name of the benchmark results to compare to",
     )
     parser_avg.add_argument(
         "--compare-file",
         type=str,
         required=True,
-        help="Result file to compare against te historic average"
+        help="Result file to compare against te historic average",
     )
     parser_avg.add_argument(
-        "--results-dir",
-        type=str,
-        required=True,
-        help="Directory storing results"
+        "--results-dir", type=str, required=True, help="Directory storing results"
     )
     parser_avg.add_argument(
         "--cutoff",
         type=str,
         help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
-        default="20000101_010101"
+        default="20000101_010101",
     )
 
     args = parser.parse_args()
@@ -322,11 +347,7 @@ def to_hist(
             raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
 
         improvements, regressions = Compare.to_hist(
-            "median",
-            args.name,
-            args.compare_file,
-            args.results_dir,
-            args.cutoff
+            "median", args.name, args.compare_file, args.results_dir, args.cutoff
         )
 
         def print_regression(entry: dict):
@@ -339,10 +360,12 @@ def print_regression(entry: dict):
 
         if improvements:
             print("#\n# Improvements:\n#\n")
-            for test in improvements: print_regression(test)
+            for test in improvements:
+                print_regression(test)
         if regressions:
             print("#\n# Regressions:\n#\n")
-            for test in regressions: print_regression(test)
+            for test in regressions:
+                print_regression(test)
             exit(1)  # Exit 1 to trigger github test failure
     else:
         print("Unsupported operation: exiting.")
diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index fd0b71c04908c..30dc607aa54a6 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -31,7 +31,9 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
     def load(self, n: int):
         results_dir = Path(self.dir) / "results"
         if not results_dir.exists() or not results_dir.is_dir():
-            print(f"Warning: {results_dir} is not a valid directory: no historic results loaded.")
+            print(
+                f"Warning: {results_dir} is not a valid directory: no historic results loaded."
+            )
             return
 
         # Get all JSON files in the results directory
@@ -40,8 +42,8 @@ def load(self, n: int):
         # Extract timestamp and sort files by it
         def extract_timestamp(file_path: Path) -> str:
             try:
-                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json 
-                ts = file_path.stem[-len("YYYYMMDD_HHMMSS"):]
+                # Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
+                ts = file_path.stem[-len("YYYYMMDD_HHMMSS") :]
                 return ts if Validate.timestamp(ts) else ""
             except IndexError:
                 return ""
@@ -80,21 +82,28 @@ def git_info_from_path(path: Path) -> (str, str):
                     github_repo = remote_url.split("git@github.com:")[1].rstrip(".git")
                 elif remote_url.startswith("https://github.com/"):
                     # HTTPS format: https://github.com/owner/repo.git
-                    github_repo = remote_url.split("https://github.com/")[1].rstrip(".git")
+                    github_repo = remote_url.split("https://github.com/")[1].rstrip(
+                        ".git"
+                    )
                 else:
                     github_repo = None
 
             except:
                 git_hash = "unknown"
                 github_repo = None
-            
+
             return git_hash, github_repo
 
         if options.git_commit_override is None or options.github_repo_override is None:
-            git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__)))
+            git_hash, github_repo = git_info_from_path(
+                os.path.dirname(os.path.abspath(__file__))
+            )
         else:
-            git_hash, github_repo = options.git_commit_override, options.github_repo_override
-        
+            git_hash, github_repo = (
+                options.git_commit_override,
+                options.github_repo_override,
+            )
+
         # Check if RUNNER_NAME environment variable has been declared.
         #
         # Github runners obfusicate hostnames, thus running socket.gethostname()
@@ -108,7 +117,7 @@ def git_info_from_path(path: Path) -> (str, str):
             # TODO is this overkill?
             Validate.runner_name(
                 hostname,
-                throw=ValueError("Illegal characters found in specified RUNNER_NAME.")
+                throw=ValueError("Illegal characters found in specified RUNNER_NAME."),
             )
 
         compute_runtime = (
@@ -139,8 +148,8 @@ def save(self, save_name, results: list[Result], to_file=True):
         # Use formatted timestamp for the filename
         timestamp = (
             datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
-            if options.timestamp_override is None else 
-            options.timestamp_override
+            if options.timestamp_override is None
+            else options.timestamp_override
         )
         file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 2006a9084d932..397632e138978 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -500,7 +500,9 @@ def validate_and_parse_env_args(env_args):
         "--timestamp-override",
         type=lambda ts: Validate.timestamp(
             ts,
-            throw=argparse.ArgumentTypeError("Specified timestamp not in YYYYMMDD_HHMMSS format.")
+            throw=argparse.ArgumentTypeError(
+                "Specified timestamp not in YYYYMMDD_HHMMSS format."
+            ),
         ),
         help="Manually specify timestamp used in metadata",
         default=options.timestamp_override,
@@ -509,7 +511,9 @@ def validate_and_parse_env_args(env_args):
         "--github-repo",
         type=lambda gh_repo: Validate.github_repo(
             gh_repo,
-            throw=argparse.ArgumentTypeError("Specified github repo not in <owner>/<repo> format.")
+            throw=argparse.ArgumentTypeError(
+                "Specified github repo not in <owner>/<repo> format."
+            ),
         ),
         help="Manually specify github repo metadata of component tested (e.g. SYCL, UMF)",
         default=options.github_repo_override,
@@ -518,7 +522,9 @@ def validate_and_parse_env_args(env_args):
         "--git-commit",
         type=lambda commit: Validate.commit_hash(
             commit,
-            throw=argparse.ArgumentTypeError("Specified commit is not a valid commit hash.")
+            throw=argparse.ArgumentTypeError(
+                "Specified commit is not a valid commit hash."
+            ),
         ),
         help="Manually specify commit hash metadata of component tested (e.g. SYCL, UMF)",
         default=options.git_commit_override,
diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py
index 61ce9f4aebc49..fc7e1ffb59f3d 100644
--- a/devops/scripts/benchmarks/presets.py
+++ b/devops/scripts/benchmarks/presets.py
@@ -42,27 +42,24 @@ def enabled_suites(preset: str) -> list[str]:
 
 # Utility scripts to validate a given preset, useful for e.g. CI:
 
+
 def main():
     parser = argparse.ArgumentParser(description="Benchmark Preset Utilities")
     subparsers = parser.add_subparsers(dest="command", required=True)
 
     query_parser = subparsers.add_parser(
-        "query",
-        help="Query benchmarks ran by a preset (as defined in presets.py)"
-    )
-    query_parser.add_argument(
-        "preset_to_query",
-        type=str,
-        help="preset name to query"
+        "query", help="Query benchmarks ran by a preset (as defined in presets.py)"
     )
+    query_parser.add_argument("preset_to_query", type=str, help="preset name to query")
     query_parser.add_argument(
-        "-q", "--quiet",
+        "-q",
+        "--quiet",
         action="store_true",
-        help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script."
+        help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script.",
     )
 
     args = parser.parse_args()
-    if args.command == 'query':
+    if args.command == "query":
         if args.preset_to_query in presets:
             if not args.quiet:
                 print(f"Benchmark suites to be ran in {args.preset_to_query}:")
@@ -70,8 +67,10 @@ def main():
                     print(suite)
             exit(0)
         else:
-            if not args.quiet: print(f"Error: No preset named '{args.preset_to_query}'.")
+            if not args.quiet:
+                print(f"Error: No preset named '{args.preset_to_query}'.")
             exit(1)
 
+
 if __name__ == "__main__":
     main()
diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py
index 77bd13f4f9971..b0a2658865562 100644
--- a/devops/scripts/benchmarks/utils/validate.py
+++ b/devops/scripts/benchmarks/utils/validate.py
@@ -1,5 +1,6 @@
 import re
 
+
 def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None):
     """
     Returns True if val is matched by pattern defined by regex, otherwise False.
@@ -9,9 +10,12 @@ def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None):
     """
     is_matching: bool = re.compile(regex).match(val) is not None
 
-    if throw is None: return is_matching
-    elif not is_matching: raise throw
-    else: return val
+    if throw is None:
+        return is_matching
+    elif not is_matching:
+        raise throw
+    else:
+        return val
 
 
 class Validate:
@@ -35,7 +39,7 @@ def timestamp(t: str, throw: Exception = None):
         return validate_on_re(
             t,
             r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$",
-            throw=throw
+            throw=throw,
         )
 
     @staticmethod
@@ -49,7 +53,7 @@ def github_repo(repo: str, throw: Exception = None):
         return validate_on_re(
             re.sub(r"^https?://github.com/", "", repo),
             r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$",
-            throw=throw
+            throw=throw,
         )
 
     @staticmethod

From ba7df662dfc0856c8bc4d3b9ef68dc5d0b8d05e1 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 08:04:36 -0700
Subject: [PATCH 103/114] Archive benchmark runs

---
 .github/workflows/sycl-ur-perf-benchmarking.yml  |  2 --
 devops/actions/run-tests/benchmark_v2/action.yml | 16 +++++++++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 2713d60f0a2b9..7dbb4da228188 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -1,8 +1,6 @@
 name: Run Benchmarks
 
 on:
-  schedule:
-    - cron: '0 1 * * *'  # 2 hrs earlier than sycl-nightly.yml
   workflow_call:
     inputs:
       preset:
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
index 734bd45c8b16d..c7aa4f3f48c2e 100644
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ b/devops/actions/run-tests/benchmark_v2/action.yml
@@ -150,7 +150,15 @@ runs:
         --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
         --results-dir "./llvm-ci-perf-results/results/"
 
-  - name: Push compute-benchmarks results
+  - name: Cache changes to benchmark folder for archival purposes
+    shell: bash
+    run: | 
+      cd "./llvm-ci-perf-results"
+      for diff in $(git diff HEAD --name-only); do
+        mkdir -p "../cached_changes/$(dirname $diff)"
+        cp "$diff" "../cached_changes/$diff"
+      done
+  - name: Push benchmarks results
     if: inputs.upload_results == 'true' && always()
     shell: bash
     run: |
@@ -197,3 +205,9 @@ runs:
           --dry-run
         cd -
       done
+  - name: Archive benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./cached_changes

From 989441dda4acf21d81b8c1383834344109011a86 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 08:10:55 -0700
Subject: [PATCH 104/114] Remove legacy benchmarking code

---
 .github/workflows/sycl-linux-run-tests.yml    |  18 +-
 .../workflows/sycl-ur-perf-benchmarking.yml   |   4 +-
 .../actions/benchmarking/aggregate/action.yml |  95 ------
 devops/actions/run-tests/benchmark/action.yml | 200 ++++++++----
 .../actions/run-tests/benchmark_v2/action.yml | 213 -------------
 devops/benchmarking/config.ini                |  44 ---
 devops/benchmarking/constants.ini             |  48 ---
 devops/benchmarking/enabled_tests.conf        |   8 -
 devops/scripts/benchmarking/aggregate.py      | 205 ------------
 devops/scripts/benchmarking/benchmark.sh      | 300 ------------------
 devops/scripts/benchmarking/common.py         | 196 ------------
 devops/scripts/benchmarking/compare.py        | 101 ------
 devops/scripts/benchmarking/load_config.py    |  30 --
 13 files changed, 149 insertions(+), 1313 deletions(-)
 delete mode 100644 devops/actions/benchmarking/aggregate/action.yml
 delete mode 100644 devops/actions/run-tests/benchmark_v2/action.yml
 delete mode 100644 devops/benchmarking/config.ini
 delete mode 100644 devops/benchmarking/constants.ini
 delete mode 100644 devops/benchmarking/enabled_tests.conf
 delete mode 100644 devops/scripts/benchmarking/aggregate.py
 delete mode 100755 devops/scripts/benchmarking/benchmark.sh
 delete mode 100644 devops/scripts/benchmarking/common.py
 delete mode 100644 devops/scripts/benchmarking/compare.py
 delete mode 100644 devops/scripts/benchmarking/load_config.py

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index 0b31408020658..c25050be24f32 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Three possible options: "e2e", "cts", and "compute-benchmarks".
+          Three possible options: "e2e", "cts", and "benchmarks".
         type: string
         default: "e2e"
 
@@ -163,8 +163,7 @@ on:
         options:
           - e2e
           - cts
-          - compute-benchmarks
-          - benchmark_v2
+          - benchmarks
 
       env:
         description: |
@@ -317,18 +316,9 @@ jobs:
         target_devices: ${{ inputs.target_devices }}
         retention-days: ${{ inputs.retention-days }}
 
-    - name: Run compute-benchmarks on SYCL
-      if: inputs.tests_selector == 'compute-benchmarks'
-      uses: ./devops/actions/run-tests/benchmark
-      with:
-        target_devices: ${{ inputs.target_devices }}
-      env:
-        RUNNER_TAG: ${{ inputs.runner }}
-        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
-
     - name: Run benchmarks
-      if: inputs.tests_selector == 'benchmark_v2'
-      uses: ./devops/actions/run-tests/benchmark_v2
+      if: inputs.tests_selector == 'benchmarks'
+      uses: ./devops/actions/run-tests/benchmark
       with:
         target_devices: ${{ inputs.target_devices }}
         upload_results: ${{ inputs.benchmark_upload_results }}
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 7dbb4da228188..28790af47bd6c 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -123,7 +123,7 @@ jobs:
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      tests_selector: benchmark_v2
+      tests_selector: benchmark
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_save_name: ${{ matrix.save_name }}
       benchmark_preset: ${{ inputs.preset }}
@@ -151,7 +151,7 @@ jobs:
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      tests_selector: benchmark_v2
+      tests_selector: benchmark
       benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_preset: ${{ inputs.preset }}
diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml
deleted file mode 100644
index c062636684b1f..0000000000000
--- a/devops/actions/benchmarking/aggregate/action.yml
+++ /dev/null
@@ -1,95 +0,0 @@
-name: 'Aggregate compute-benchmark results and produce historical averages'
-
-# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
-# how the benchmark results compare to a historical average: This historical
-# average is calculated in this composite workflow, which aggregates historical
-# data and produces measures of central tendency (median in this case) used for
-# this purpose.
-#
-# This action assumes that /devops has been checked out in ./devops. This action
-# also assumes that GITHUB_TOKEN was properly set in env, because according to
-# Github, that's apparently the recommended way to pass a secret into a github
-# action:
-#
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
-#
-
-inputs:
-  lookback_days:
-    type: number
-    required: true
-
-runs:
-  using: "composite"
-  steps:
-  - name: Obtain oldest timestamp allowed for data in aggregation
-    shell: bash
-    run: |
-      # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP.
-      SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')"
-      if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then
-        echo "Please ensure inputs.lookback_days is a number."
-        exit 1
-      fi
-      SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)"
-      if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then
-        echo "Invalid timestamp generated: is inputs.lookback_days valid?"
-        exit 1
-      fi
-      echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV
-  - name: Load benchmarking configuration
-    shell: bash
-    run: |
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-      echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV
-      echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV
-  - name: Checkout historical performance results repository
-    shell: bash
-    run: |
-      if [ ! -d ./llvm-ci-perf-results ]; then
-        git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
-      fi
-  - name: Run aggregator on historical results
-    shell: bash
-    run: |
-      # The current format of the historical results respository is:
-      #
-      # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
-      #
-      # Thus, a min/max depth of 3 is used to enumerate all test cases in the
-      # repository. Test name is also derived from here.
-      find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' |
-      while read -r dir; do
-        test_name="$(basename "$dir")"
-        python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP"
-      done
-  - name: Upload average to the repo
-    shell: bash
-    run: |
-      cd ./llvm-ci-perf-results
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
-      # Make sure changes have been made
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No changes to median, skipping push."
-      else
-        git add .
-        git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
-      fi
-  - name: Find aggregated average results artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for aggregated averages here:
-      #
-      EOF
-  - name: Archive new medians
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: llvm-ci-perf-results new medians
-      path: ./llvm-ci-perf-results/**/*-median.csv
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index 03b7d4ad776fd..c7aa4f3f48c2e 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -1,24 +1,30 @@
-name: 'Run compute-benchmarks'
-
-# Run compute-benchmarks on SYCL
-# 
-# This action assumes SYCL is in ./toolchain, and that /devops has been
-# checked out in ./devops. This action also assumes that GITHUB_TOKEN
-# was properly set in env, because according to Github, that's apparently the
-# recommended way to pass a secret into a github action:
-#
-# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+name: 'Run benchmarks'
+
+# This action assumes the following prerequisites:
 #
-# This action also expects a RUNNER_TAG environment variable to be set to the
-# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux
-# are fully supported. Although this workflow won't stop you from running other
-# devices, note that only gen12 and pvc has been tested to work.
+# - SYCL is placed in ./toolchain -- TODO change this
+# - /devops has been checked out in ./devops.
+# - env.GITHUB_TOKEN was properly set, because according to Github, that's
+#   apparently the recommended way to pass a secret into a github action:
+
+#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
 #
+# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
+#   only specific runners are fully supported.
 
 inputs:
   target_devices:
     type: string
     required: True
+  upload_results:
+    type: string
+    required: True
+  save_name:
+    type: string
+    required: True
+  preset:
+    type: string
+    required: True
 
 runs:
   using: "composite"
@@ -27,16 +33,24 @@ runs:
     shell: bash
     env:
       TARGET_DEVICE: ${{ inputs.target_devices }}
+      PRESET: ${{ inputs.preset }}
     run: |
       case "$RUNNER_TAG" in
-        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        '["PVC_PERF"]' ) ;;
         *)
           echo "#"
-          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# WARNING: Only specific tuned runners are fully supported."
           echo "# This workflow is not guaranteed to work with other runners."
           echo "#" ;;
       esac
 
+      # Ensure runner name has nothing injected
+      # TODO: in terms of security, is this overkill?
+      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
+          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
+          exit 1
+      fi
+
       # input.target_devices is not directly used, as this allows code injection
       case "$TARGET_DEVICE" in
         level_zero:*) ;;
@@ -46,11 +60,15 @@ runs:
           echo "# This workflow is not guaranteed to work with other backends."
           echo "#" ;;
       esac
+      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
+
+      # Make sure specified preset is a known value and is not malicious
+      python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
+      [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
+      echo "PRESET=$PRESET" >> $GITHUB_ENV
   - name: Compute CPU core range to run benchmarks on
     shell: bash
     run: |
-      # Taken from ur-benchmark-reusable.yml:
-
       # Compute the core range for the first NUMA node; second node is used by
       # UMF. Skip the first 4 cores as the kernel is likely to schedule more
       # work on these.
@@ -67,61 +85,129 @@ runs:
 
       ZE_AFFINITY_MASK=0
       echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
+  - name: Checkout results repo
+    shell: bash
+    run: |
+      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
   - name: Run compute-benchmarks
+    env:
+      # Need to append "_<device>_<backend>" to save name in order to follow
+      # conventions:
+      SAVE_PREFIX: ${{ inputs.save_name }}
     shell: bash
     run: |
-      cat << EOF
-      #
-      # NOTE TO DEVELOPERS:
-      #
-
-      Check latter steps of the workflow: This job produces an artifact with:
-        - benchmark results from passing/failing tests
-        - log containing all failing (too slow) benchmarks
-        - log containing all erroring benchmarks
-
-      While this step in the workflow provides debugging output describing this
-      information, it might be easier to inspect the logs from the artifact
-      instead.
-
-      EOF
-      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      # TODO generate summary + display helpful message here
       export CMPLR_ROOT=./toolchain
       echo "-----"
       sycl-ls
       echo "-----"
-      taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
-  - name: Push compute-benchmarks results
-    if: always()
+      # Using --break-system-packages because:
+      # - venv is not installed
+      # - unable to install anything via pip, as python packages in the docker
+      #   container are managed by apt
+      # - apt is unable to install anything due to unresolved dpkg dependencies,
+      #   as a result of how the sycl nightly images are created
+      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
+      echo "-----"
+
+      # clang builds have git repo / commit hashes in their --version output,
+      # same goes for dpcpp. Obtain git repo / commit hash info this way:
+
+      # First line of --version is formatted 'clang version ... (<repo> <commit>)'
+      # thus we parse for (<repo> <commit>):
+      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')"
+      if [ -z "$sycl_git_info" ]; then
+        echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
+        exit 1
+      fi
+      sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
+      sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
+
+      case "$ONEAPI_DEVICE_SELECTOR" in
+        level_zero:*) SAVE_SUFFIX="L0" ;;
+        level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
+        opencl:*) SAVE_SUFFIX="OCL" ;;
+        *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";;
+      esac
+      # TODO accomodate for different GPUs and backends
+      SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}"
+      SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')"  # Timestamps are in UTC time
+
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
+        "$(realpath ./llvm_test_workdir)" \
+        --sycl "$(realpath ./toolchain)" \
+        --save "$SAVE_NAME" \
+        --output-html remote \
+        --results-dir "./llvm-ci-perf-results/" \
+        --output-dir "./llvm-ci-perf-results/" \
+        --preset "$PRESET" \
+        --timestamp-override "$SAVE_TIMESTAMP" \
+        --github-repo "$sycl_git_repo" \
+        --git-commit "$sycl_git_commit"
+      echo "-----"
+      python3 ./devops/scripts/benchmarks/compare.py to_hist \
+        --name "$SAVE_NAME" \
+        --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
+        --results-dir "./llvm-ci-perf-results/results/"
+
+  - name: Cache changes to benchmark folder for archival purposes
+    shell: bash
+    run: | 
+      cd "./llvm-ci-perf-results"
+      for diff in $(git diff HEAD --name-only); do
+        mkdir -p "../cached_changes/$(dirname $diff)"
+        cp "$diff" "../cached_changes/$diff"
+      done
+  - name: Push benchmarks results
+    if: inputs.upload_results == 'true' && always()
     shell: bash
     run: |
-      # Load configuration values
-      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
-
       cd "./llvm-ci-perf-results"
       git config user.name "SYCL Benchmarking Bot"
       git config user.email "sys_sycl_benchmarks@intel.com"
-      git pull
+      results_branch="unify-ci"
+
       git add .
-      # Make sure changes have been made
       if git diff --quiet && git diff --cached --quiet; then
         echo "No new results added, skipping push."
-      else
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+        exit 0
       fi
-  - name: Find benchmark result artifact here
-    if: always()
-    shell: bash
-    run: |
-      cat << EOF
-      #
-      # Artifact link for benchmark results here:
-      #
-      EOF
-  - name: Archive compute-benchmark results
+
+      for attempt in 1 2 3; do
+        echo "Attempt $attempt to push new results"
+        git add .
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)"
+
+        if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then
+          echo "Push succeeded"
+          break
+        fi
+
+        echo "Push failed, retrying..."
+        if [ -n "$results_file" ]; then
+          cached_result="$(mktemp -d)/$(basename $results_file)"
+          mv "$results_file" "$cached_result"
+
+          git reset --hard "origin/$results_branch"
+          git pull origin "$results_branch"
+
+          mv "$cached_result" "$results_file"
+        fi
+
+        echo "Regenerating data.json..."
+        cd ../
+        ./devops/scripts/benchmarks/main.py \
+          "$(realpath ./llvm_test_workdir)" \
+          --output-html remote \
+          --results-dir "./llvm-ci-perf-results/" \
+          --output-dir "./llvm-ci-perf-results/" \
+          --dry-run
+        cd -
+      done
+  - name: Archive benchmark results
     if: always()
     uses: actions/upload-artifact@v4
     with:
-      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./artifact
+      name: Benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./cached_changes
diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml
deleted file mode 100644
index c7aa4f3f48c2e..0000000000000
--- a/devops/actions/run-tests/benchmark_v2/action.yml
+++ /dev/null
@@ -1,213 +0,0 @@
-name: 'Run benchmarks'
-
-# This action assumes the following prerequisites:
-#
-# - SYCL is placed in ./toolchain -- TODO change this
-# - /devops has been checked out in ./devops.
-# - env.GITHUB_TOKEN was properly set, because according to Github, that's
-#   apparently the recommended way to pass a secret into a github action:
-
-#   https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
-#
-# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently,
-#   only specific runners are fully supported.
-
-inputs:
-  target_devices:
-    type: string
-    required: True
-  upload_results:
-    type: string
-    required: True
-  save_name:
-    type: string
-    required: True
-  preset:
-    type: string
-    required: True
-
-runs:
-  using: "composite"
-  steps:
-  - name: Check specified runner type / target backend
-    shell: bash
-    env:
-      TARGET_DEVICE: ${{ inputs.target_devices }}
-      PRESET: ${{ inputs.preset }}
-    run: |
-      case "$RUNNER_TAG" in
-        '["PVC_PERF"]' ) ;;
-        *)
-          echo "#"
-          echo "# WARNING: Only specific tuned runners are fully supported."
-          echo "# This workflow is not guaranteed to work with other runners."
-          echo "#" ;;
-      esac
-
-      # Ensure runner name has nothing injected
-      # TODO: in terms of security, is this overkill?
-      if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then
-          echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]."
-          exit 1
-      fi
-
-      # input.target_devices is not directly used, as this allows code injection
-      case "$TARGET_DEVICE" in
-        level_zero:*) ;;
-        *)
-          echo "#"
-          echo "# WARNING: Only level_zero backend is fully supported."
-          echo "# This workflow is not guaranteed to work with other backends."
-          echo "#" ;;
-      esac
-      echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV 
-
-      # Make sure specified preset is a known value and is not malicious
-      python3 ./devops/scripts/benchmarks/presets.py query "$PRESET"
-      [ "$?" -ne 0 ] && exit 1  # Stop workflow if invalid preset
-      echo "PRESET=$PRESET" >> $GITHUB_ENV
-  - name: Compute CPU core range to run benchmarks on
-    shell: bash
-    run: |
-      # Compute the core range for the first NUMA node; second node is used by
-      # UMF. Skip the first 4 cores as the kernel is likely to schedule more
-      # work on these.
-      CORES="$(lscpu | awk '
-        /NUMA node0 CPU|On-line CPU/ {line=$0}
-        END {
-          split(line, a, " ")
-          split(a[4], b, ",")
-          sub(/^0/, "4", b[1])
-          print b[1]
-        }')"
-      echo "CPU core range to use: $CORES"
-      echo "CORES=$CORES" >> $GITHUB_ENV
-
-      ZE_AFFINITY_MASK=0
-      echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV
-  - name: Checkout results repo
-    shell: bash
-    run: |
-      git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results
-  - name: Run compute-benchmarks
-    env:
-      # Need to append "_<device>_<backend>" to save name in order to follow
-      # conventions:
-      SAVE_PREFIX: ${{ inputs.save_name }}
-    shell: bash
-    run: |
-      # TODO generate summary + display helpful message here
-      export CMPLR_ROOT=./toolchain
-      echo "-----"
-      sycl-ls
-      echo "-----"
-      # Using --break-system-packages because:
-      # - venv is not installed
-      # - unable to install anything via pip, as python packages in the docker
-      #   container are managed by apt
-      # - apt is unable to install anything due to unresolved dpkg dependencies,
-      #   as a result of how the sycl nightly images are created
-      pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
-      echo "-----"
-
-      # clang builds have git repo / commit hashes in their --version output,
-      # same goes for dpcpp. Obtain git repo / commit hash info this way:
-
-      # First line of --version is formatted 'clang version ... (<repo> <commit>)'
-      # thus we parse for (<repo> <commit>):
-      sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')"
-      if [ -z "$sycl_git_info" ]; then
-        echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?"
-        exit 1
-      fi
-      sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)"
-      sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)"
-
-      case "$ONEAPI_DEVICE_SELECTOR" in
-        level_zero:*) SAVE_SUFFIX="L0" ;;
-        level_zero_v2:*) SAVE_SUFFIX="L0v2" ;;
-        opencl:*) SAVE_SUFFIX="OCL" ;;
-        *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";;
-      esac
-      # TODO accomodate for different GPUs and backends
-      SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}"
-      SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')"  # Timestamps are in UTC time
-
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
-        "$(realpath ./llvm_test_workdir)" \
-        --sycl "$(realpath ./toolchain)" \
-        --save "$SAVE_NAME" \
-        --output-html remote \
-        --results-dir "./llvm-ci-perf-results/" \
-        --output-dir "./llvm-ci-perf-results/" \
-        --preset "$PRESET" \
-        --timestamp-override "$SAVE_TIMESTAMP" \
-        --github-repo "$sycl_git_repo" \
-        --git-commit "$sycl_git_commit"
-      echo "-----"
-      python3 ./devops/scripts/benchmarks/compare.py to_hist \
-        --name "$SAVE_NAME" \
-        --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
-        --results-dir "./llvm-ci-perf-results/results/"
-
-  - name: Cache changes to benchmark folder for archival purposes
-    shell: bash
-    run: | 
-      cd "./llvm-ci-perf-results"
-      for diff in $(git diff HEAD --name-only); do
-        mkdir -p "../cached_changes/$(dirname $diff)"
-        cp "$diff" "../cached_changes/$diff"
-      done
-  - name: Push benchmarks results
-    if: inputs.upload_results == 'true' && always()
-    shell: bash
-    run: |
-      cd "./llvm-ci-perf-results"
-      git config user.name "SYCL Benchmarking Bot"
-      git config user.email "sys_sycl_benchmarks@intel.com"
-      results_branch="unify-ci"
-
-      git add .
-      if git diff --quiet && git diff --cached --quiet; then
-        echo "No new results added, skipping push."
-        exit 0
-      fi
-
-      for attempt in 1 2 3; do
-        echo "Attempt $attempt to push new results"
-        git add .
-        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
-        results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)"
-
-        if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then
-          echo "Push succeeded"
-          break
-        fi
-
-        echo "Push failed, retrying..."
-        if [ -n "$results_file" ]; then
-          cached_result="$(mktemp -d)/$(basename $results_file)"
-          mv "$results_file" "$cached_result"
-
-          git reset --hard "origin/$results_branch"
-          git pull origin "$results_branch"
-
-          mv "$cached_result" "$results_file"
-        fi
-
-        echo "Regenerating data.json..."
-        cd ../
-        ./devops/scripts/benchmarks/main.py \
-          "$(realpath ./llvm_test_workdir)" \
-          --output-html remote \
-          --results-dir "./llvm-ci-perf-results/" \
-          --output-dir "./llvm-ci-perf-results/" \
-          --dry-run
-        cd -
-      done
-  - name: Archive benchmark results
-    if: always()
-    uses: actions/upload-artifact@v4
-    with:
-      name: Benchmark run ${{ github.run_id }} (${{ runner.name }})
-      path: ./cached_changes
diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini
deleted file mode 100644
index 988d1d9f08af9..0000000000000
--- a/devops/benchmarking/config.ini
+++ /dev/null
@@ -1,44 +0,0 @@
-;
-; This file contains configuration options to change the behaviour of the
-; benchmarking workflow in sycl-linux-run-tests.yml.
-;
-; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
-; contents of this file must be sanitized first before use.
-; See: /devops/scripts/benchmarking/common.py
-;
-
-; Compute-benchmark compile/run options
-[compute_bench]
-; Value for -j during compilation of compute-benchmarks
-compile_jobs = 40
-; Number of iterations to run compute-benchmark tests
-iterations = 5000
-
-; Options for benchmark result metrics (to record/compare against)
-[metrics]
-; Sets the metrics to record/aggregate in the historical average.
-; Format: comma-separated list of column names in compute-benchmark results
-recorded = Median,StdDev
-; Sets the tolerance for each recorded metric and their allowed deviation from
-; the historical average. Metrics not included here are not compared against
-; when passing/failing benchmark results.
-; Format: comma-separated list of <metric>:<deviation percentage in decimals>
-tolerances = Median:0.08
-
-; Options for computing historical averages
-[average]
-; Number of days (from today) to look back for results when computing historical
-; average 
-cutoff_range = 7
-; Minimum number of samples required to compute a historical average
-min_threshold = 10
-
-; ONEAPI_DEVICE_SELECTOR linting/options
-[device_selector]
-; Backends to allow in device_selector
-enabled_backends = level_zero,opencl,cuda,hip
-; native_cpu is disabled
-
-; Devices to allow in device_selector
-enabled_devices = cpu,gpu
-; fpga is disabled
diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini
deleted file mode 100644
index 9281ece8f4950..0000000000000
--- a/devops/benchmarking/constants.ini
+++ /dev/null
@@ -1,48 +0,0 @@
-;
-; This file defines constants used throughout the benchmarking workflow in
-; sycl-linux-run-tests.yml. If you're trying to change the behavior of this
-; workflow, you're likely looking for /devops/benchmarking/config.ini instead.
-;
-; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
-; contents of this file must be sanitized first before use.
-; See: /devops/scripts/benchmarking/common.py
-;
-
-; Constants for compute-benchmarks
-[compute_bench]
-git_repo = intel/compute-benchmarks
-git_branch = master
-git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0
-; path = ./compute-benchmarks
-
-; Constants for git repo storing benchmark performance results
-[perf_res]
-git_repo = intel/llvm-ci-perf-results
-git_branch = main
-; Path to clone performance result repo
-; path = ./llvm-ci-perf-results
-
-; It was decided that paths should be hardcoded throughout this workflow for
-; security reasons and ease of readability. Do not use paths as constants.
-
-; ; Constants for artifacts
-; [artifact]
-; ; Path to root folder storing benchmark CI artifact
-; path = ./artifact
-; ; Path (relative to artifact.path) to cache compute-benchmark results
-; ;
-; ; If a test result does not get moved out of this catch-all cache path, it is
-; ; considered to have failed
-; output_cache = ./artifact/failed_tests
-; ; Path (relative to artifact.path) to cache passing compute-benchmark results
-; passing_cache = ./artifact/passing_tests
-
-; [timestamp]
-; ; Timestamp format used for 
-; format = %%Y%%m%%d_%%H%%M%%S
-
-; [benchmark_log]
-; ; Log file for test cases that perform over the allowed variance
-; slow = ./artifact/benchmarks_failed.log
-; ; Log file for test cases that errored / failed to build
-; error = ./artifact/benchmarks_errored.log
diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf
deleted file mode 100644
index 20659cbea636d..0000000000000
--- a/devops/benchmarking/enabled_tests.conf
+++ /dev/null
@@ -1,8 +0,0 @@
-# Test cases to be enabled:
-api_overhead_benchmark_sycl
-memory_benchmark_sycl
-miscellaneous_benchmark_sycl
-ulls_benchmark_sycl
-
-# As of January 2025, these are every compute-benchmark tests with a SYCL
-# implementation.
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
deleted file mode 100644
index f62a8ffed83c5..0000000000000
--- a/devops/scripts/benchmarking/aggregate.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import csv
-import sys
-from pathlib import Path
-import heapq
-import statistics
-from common import Validate, SanitizedConfig
-from abc import ABC, abstractmethod
-import os
-
-
-class Aggregator(ABC):
-    """
-    Aggregator classes used to "aggregate" a pool of elements, and produce an
-    "average" (precisely, some "measure of central tendency") from the elements.
-    """
-
-    @staticmethod
-    @abstractmethod
-    def get_type() -> str:
-        """
-        Return a string indicating the type of average this aggregator
-        produces.
-        """
-        pass
-
-    @abstractmethod
-    def add(self, n: float):
-        """
-        Add/aggregate an element to the pool of elements used by this aggregator
-        to produce an average calculation.
-        """
-        pass
-
-    @abstractmethod
-    def get_avg(self) -> float:
-        """
-        Produce an average from the pool of elements aggregated using add().
-        """
-        pass
-
-
-class SimpleMedian(Aggregator):
-    """
-    Simple median calculation: if the number of samples being generated are low,
-    this is the fastest median method.
-    """
-
-    def __init__(self):
-        self.elements = []
-
-    @staticmethod
-    def get_type() -> str:
-        return "median"
-
-    def add(self, n: float):
-        self.elements.append(n)
-
-    def get_avg(self) -> float:
-        return statistics.median(self.elements)
-
-
-class StreamingMedian(Aggregator):
-    """
-    Calculate medians incrementally using heaps: Theoretically the fastest way
-    to calculate a median from a stream of elements, but realistically is only
-    faster when dealing with huge numbers of samples that would be generated by
-    i.e. enabling this workflow in precommit and using longer periods of time.
-    """
-
-    def __init__(self):
-        # Gist: we keep a minheap and a maxheap, and store the median as the top
-        # of the minheap. When a new element comes it gets put into the heap
-        # based on if the element is bigger than the current median. Then, the
-        # heaps are heapified and the median is repopulated by heapify.
-        self.minheap_larger = []
-        self.maxheap_smaller = []
-
-    @staticmethod
-    def get_type() -> str:
-        return "median"
-
-    # Note: numbers on maxheap should be negative, as heapq
-    # is minheap by default
-
-    def add(self, n: float):
-        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
-            heapq.heappush(self.maxheap_smaller, -n)
-        else:
-            heapq.heappush(self.minheap_larger, n)
-
-        # Ensure minheap has more elements than maxheap
-        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
-            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
-        elif len(self.maxheap_smaller) < len(self.minheap_larger):
-            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
-
-    def get_avg(self) -> float:
-        if len(self.maxheap_smaller) == len(self.minheap_larger):
-            # Equal number of elements smaller and larger than "median":
-            # thus, there are two median values. The median would then become
-            # the average of both median values.
-            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
-        else:
-            # Otherwise, median is always in minheap, as minheap is always
-            # bigger
-            return -self.maxheap_smaller[0]
-
-
-class Aggregate:
-    """
-    Static class providing methods for aggregating data
-    """
-
-    @staticmethod
-    def hist_avg(
-        benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian
-    ):
-        if not os.path.isdir(res_dir):
-            print(f"Not a directory: {res_dir}.", file=sys.stderr)
-            exit(1)
-
-        def get_csv_samples() -> list[str]:
-            """Get all valid .csv samples from the results folder."""
-            cache_dir = Path(f"{res_dir}")
-            # Filter all benchmark .csv files in the result directory:
-            return list(
-                filter(
-                    # Make sure the .csv "file" is a file:
-                    lambda f: f.is_file()
-                    # Make sure timestamp of .csv file is good format:
-                    # [-19:-4] corresponds to the timestamp in the filename.
-                    and Validate.timestamp(str(f)[-19:-4])
-                    # Make sure timestamp is bigger than cutoff timestamp:
-                    and str(f)[-19:-4] > cutoff,
-                    cache_dir.glob(f"{benchmark_name}-*_*.csv"),
-                )
-            )
-
-        # Calculate median of every desired metric:
-        samples_aggregate = dict()
-        filtered_samples = get_csv_samples()
-        if len(filtered_samples) == 0:
-            print(
-                f"WARNING: No results for {benchmark_name} found from {cutoff} to now",
-                file=sys.stderr,
-            )
-        for sample_path in filtered_samples:
-            with open(sample_path, "r") as sample_file:
-                for sample in csv.DictReader(sample_file):
-                    test = sample["TestCase"]
-                    # Construct entry in aggregator for test if it doesn't exist
-                    # already:
-                    if test not in samples_aggregate:
-                        samples_aggregate[test] = {
-                            metric: aggregator()
-                            for metric in SanitizedConfig.METRICS_TOLERANCES
-                        }
-
-                    # For each metric of concern, add to aggregator:
-                    for metric in SanitizedConfig.METRICS_TOLERANCES:
-                        sample_value = Validate.sanitize_stat(sample[metric])
-                        if not isinstance(sample_value, float):
-                            print(
-                                f"Malformatted statistic in {str(sample_path)}: "
-                                + f"'{sample[metric]}' for {test}."
-                            )
-                            exit(1)
-                        # Add metric from sample for current test to aggregate:
-                        samples_aggregate[test][metric].add(sample_value)
-
-        # Calculate + write new average (from samples_aggregate) in new .csv file:
-        with open(
-            f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w"
-        ) as output_csv:
-            writer = csv.DictWriter(
-                output_csv,
-                fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()],
-            )
-            writer.writeheader()
-            for test in samples_aggregate:
-                writer.writerow(
-                    {"TestCase": test}
-                    | {
-                        metric: samples_aggregate[test][metric].get_avg()
-                        for metric in SanitizedConfig.METRICS_TOLERANCES
-                    }
-                )
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 5:
-        print(
-            f"Usage: {sys.argv[0]} <path to /devops> <benchmark name> <absolute path to benchmark results> <cutoff timestamp YYYYMMDD_HHMMSS>"
-        )
-        exit(1)
-    if not Validate.timestamp(sys.argv[4]):
-        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr)
-        exit(1)
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    SanitizedConfig.load(sys.argv[1])
-
-    Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh
deleted file mode 100755
index bbfd669774f9a..0000000000000
--- a/devops/scripts/benchmarking/benchmark.sh
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/bin/sh
-
-#
-# benchmark.sh: Benchmark dpcpp using compute-benchmarks
-#
-
-usage () {
-    >&2 echo "Usage: $0 <compute-benchmarks git repo> -t <runner type> [-B <compute-benchmarks build path>]
-  -n  Github runner name -- Required
-  -c  Clean up working directory
-  -C  Clean up working directory and exit
-  -s  Cache results
-
-This script builds and runs benchmarks from compute-benchmarks."
-    exit 1
-}
-
-# Ensures test cases read from enabled_tests.conf contains no malicious content
-_validate_testname () {
-    if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then
-        echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_"
-        exit 1
-    fi
-}
-
-clone_perf_res() {
-    echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###"
-    git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
-    [ "$?" -ne 0 ] && exit "$?"
-}
-
-clone_compute_bench() {
-    echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
-    git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \
-              --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \
-              ./compute-benchmarks
-    if [ ! -d "./compute-benchmarks" ]; then
-        echo "Failed to clone compute-benchmarks."
-        exit 1
-    elif [ -n  "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then
-        cd ./compute-benchmarks
-        git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT"
-        if [ "$?" -ne 0 ]; then
-            echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'."
-            exit 1
-        fi
-        cd -
-    fi
-}
-
-build_compute_bench() {
-    echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###"
-    mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build &&
-    # No reason to turn on ccache, if this docker image will be disassembled later on
-    cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE
-    # TODO enable mechanism for opting into L0 and OCL -- the concept is to
-    # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime
-    # overhead, but this is mostly an idea that needs to be mulled upon.
-
-    if [ "$?" -eq 0 ]; then
-        while IFS= read -r case; do
-            # Skip lines starting with '#'
-            [ "${case##\#*}" ] || continue
-
-            _validate_testname "$case"
-            make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case"
-        done < "$TESTS_CONFIG"
-    fi
-    cd -
-}
-
-# Check if the number of samples for a given test case is less than a threshold
-# set in benchmark-ci.conf
-#
-# Usage: <relative path of directory containing test case results>
-samples_under_threshold () {
-    # Directory doesn't exist, samples automatically under threshold
-    [ ! -d "./llvm-ci-perf-results/$1" ] && return 0
-    file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )"
-    [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ]
-}
-
-# Check for a regression via compare.py
-#
-# Usage: check_regression <relative path of output csv>
-check_regression() {
-    csv_relpath="$(dirname "$1")"
-    csv_name="$(basename "$1")"
-    if samples_under_threshold "$csv_relpath"; then
-        echo "Not enough samples to construct a good average, performance\
- check skipped!"
-        return 0 # Success status
-    fi
-    python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \
-        "$DEVOPS_PATH" "$csv_relpath" "$csv_name"
-    return $?
-}
-
-# Move the results of our benchmark into the git repo, and save benchmark
-# results to artifact archive
-#
-# Usage: cache <relative path of output csv>
-cache() {
-    mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)"
-    cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1"
-    mkdir -p "$(dirname ./llvm-ci-perf-results/$1)"
-    mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1"
-}
-
-# Check for a regression + cache if no regression found
-#
-# Usage: check_and_cache <relative path of output csv>
-check_and_cache() {
-    echo "Checking $1..."
-    if check_regression $1; then
-        if [ "$CACHE_RESULTS" -eq "1" ]; then
-            echo "Caching $1..."
-            cache $1
-        fi
-    else
-        [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!"
-    fi
-}
-
-# Run and process the results of each enabled benchmark in enabled_tests.conf
-process_benchmarks() {
-    echo "### Running and processing selected benchmarks ###"
-    if [ -z "$TESTS_CONFIG" ]; then
-        echo "Setting tests to run via cli is not currently supported."
-        exit 1
-    else
-        rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null
-        mkdir -p ./artifact
-        # Loop through each line of enabled_tests.conf, but ignore lines in the
-        # test config starting with #'s:
-        grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do
-            _validate_testname "$testcase"
-            echo "# Running $testcase..."
-
-            # The benchmark results git repo and this script's output both share
-            # the following directory structure:
-            #
-            # /<device selector>/<runner>/<test name>
-            #
-            # Instead of specifying 2 paths with a slightly different root
-            # folder name for every function we use, we can use a relative path
-            # to represent the file in both folders.
-            #
-            # Figure out the relative path of our testcase result:
-            test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase"
-            output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv"
-			mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists
-
-            # Tests are first placed in ./artifact/failed_tests, and are only
-            # moved to passing_tests or the performance results repo if the
-            # benchmark results are passing
-            output_csv="./artifact/failed_tests/$output_csv_relpath"
-            "./compute-benchmarks/build/bin/$testcase" --csv \
-                --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv"
-
-            exit_status="$?"
-            if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then 
-                # Filter out header lines not in csv format:
-                tail +8 "$output_csv" > .tmp_res
-                mv .tmp_res "$output_csv"
-                check_and_cache $output_csv_relpath
-            else
-                echo "[ERROR] $testcase returned exit status $exit_status"
-                echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log
-            fi
-        done
-    fi
-}
-
-# Handle failures + produce a report on what failed
-process_results() {
-    fail=0
-    if [ -s ./artifact/benchmarks_failed.log ]; then
-        printf "\n### Tests performing over acceptable range of average: ###\n"
-        cat ./artifact/benchmarks_failed.log
-        echo ""
-        fail=2
-    fi
-    if [ -s ./artifact/benchmarks_errored.log ]; then
-        printf "\n### Tests that failed to run: ###\n"
-        cat ./artifact/benchmarks_errored.log
-        echo ""
-        fail=1
-    fi
-    exit $fail
-}
-
-cleanup() {
-    echo "### Cleaning up compute-benchmark builds from prior runs ###"
-    rm -rf ./compute-benchmarks
-    rm -rf ./llvm-ci-perf-results
-    [ ! -z "$_exit_after_cleanup" ] && exit
-}
-
-load_configs() {
-    # This script needs to know where the intel/llvm "/devops" directory is,
-    # containing all the configuration files and the compare script.
-    #
-    # If this is not provided, this function tries to guess where the files
-    # are based on how the script is called, and verifies that all necessary
-    # configs and scripts are reachable. 
-
-    # This benchmarking script is usually at:
-    # 
-    # /devops/scripts/benchmarking/benchmark.sh
-    #
-    # Derive /devops based on location of this script:
-    [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.."
-    if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then
-        echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable."
-        exit 1
-    fi
-
-    TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")"
-    COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")"
-    LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")"
-
-    for file in \
-        "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY"
-    do
-        if [ ! -f "$file" ]; then
-            echo "Please provide path to /devops in DEVOPS_PATH."
-            exit -1
-        fi
-    done
-
-    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config)
-    $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants)
-}
-
-#####
-
-load_configs
-
-COMPUTE_BENCH_COMPILE_FLAGS=""
-CACHE_RESULTS="0"
-# Timestamp format is YYYYMMDD_HHMMSS
-TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
-
-# CLI flags + overrides to configuration options:
-while getopts "n:cCs" opt; do
-    case "$opt" in
-		n) 
-        if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then
-            echo "Illegal characters in runner name."
-            exit 1
-        fi
-        RUNNER="$OPTARG"
-        ;;
-        # Cleanup status is saved in a var to ensure all arguments are processed before
-        # performing cleanup
-        c) _cleanup=1 ;;
-        C) _cleanup=1 && _exit_after_cleanup=1 ;;
-        s) CACHE_RESULTS=1;;
-        \?) usage ;;
-    esac
-done
-
-# Check all necessary variables exist:
-if [ -z "$CMPLR_ROOT" ]; then
-    echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build."
-    exit 1
-elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then
-    echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use."
-    exit 1
-elif [ -z "$RUNNER" ]; then
-    echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results."
-    exit 1
-fi
-
-# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the
-# same time, or use specific device id's
-_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')"
-_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')"
-_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//"
-if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then
-    echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \
-device is selected, and devices are not selected by indices."
-    echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS"
-    echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES"
-    exit 1
-fi
-# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this 
-# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names 
-DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')"
-
-# Clean up and delete all cached files if specified:
-[ ! -z "$_cleanup" ] && cleanup
-# Clone and build only if they aren't already cached/deleted:
-[ ! -d ./llvm-ci-perf-results     ] && clone_perf_res
-[ ! -d ./compute-benchmarks       ] && clone_compute_bench
-[ ! -d ./compute-benchmarks/build ] && build_compute_bench
-# Process benchmarks:
-process_benchmarks
-process_results
\ No newline at end of file
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
deleted file mode 100644
index c400b686db90f..0000000000000
--- a/devops/scripts/benchmarking/common.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import re
-import os
-import sys
-import string
-import configparser
-
-
-class Validate:
-    """Static class containing methods for validating various fields"""
-
-    @staticmethod
-    def filepath(path: str) -> bool:
-        """
-        Returns True if path is clean (no illegal characters), otherwise False.
-        """
-        filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+")
-        return filepath_re.match(path) is not None
-
-    @staticmethod
-    def timestamp(t: str) -> bool:
-        """
-        Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False.
-        """
-        timestamp_re = re.compile(
-            r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$"
-        )
-        return timestamp_re.match(t) is not None
-
-    @staticmethod
-    def sanitize_stat(stat: str) -> float:
-        """
-        Sanitize statistics found in compute-benchmark output csv files. Returns
-        float if sanitized, None if not sanitizable.
-        """
-        # Get rid of %
-        if stat[-1] == "%":
-            stat = stat[:-1]
-
-        # Cast to float: If cast succeeds, the statistic is clean.
-        try:
-            return float(stat)
-        except ValueError:
-            return None
-
-
-class SanitizedConfig:
-    """
-    Static class for holding sanitized configuration values used within python.
-
-    Configuration option names follow <section name>_<option name> from config
-    file.
-    """
-
-    loaded: bool = False
-    # PERF_RES_PATH: str = None
-    # ARTIFACT_OUTPUT_CACHE: str = None
-    METRICS_TOLERANCES: dict = None
-    METRICS_RECORDED: list = None
-    # BENCHMARK_LOG_SLOW: str = None
-    # BENCHMARK_LOG_ERROR: str = None
-
-    @staticmethod
-    def load(devops_path: str):
-        config = Configuration(devops_path)
-        config.export_python_globals()
-
-
-class Configuration:
-    """
-    Class handling loading, sanitizing, and exporting configuration options for
-    use within python or shell scripts.
-    """
-
-    def __init__(self, devops_path: str):
-        """
-        Initialize this configuration handler by finding configuration files
-
-        @param devops_path Path to /devops folder in intel/llvm
-        """
-        self.config_path = f"{devops_path}/benchmarking/config.ini"
-        self.constants_path = f"{devops_path}/benchmarking/constants.ini"
-
-        if not os.path.isfile(self.config_path):
-            print(
-                f"config.ini not found in {devops_path}/benchmarking.", file=sys.stderr
-            )
-            exit(1)
-        if not os.path.isfile(self.constants_path):
-            print(
-                f"constants.ini not found in {devops_path}/benchmarking.",
-                file=sys.stderr,
-            )
-            exit(1)
-
-    def __sanitize(self, value: str, field: str) -> str:
-        """
-        Enforces an allowlist of characters and sanitizes input from config
-        files.
-        """
-        _alnum = list(string.ascii_letters + string.digits)
-        allowlist = _alnum + ["_", "-", ".", ",", ":", "/", "%"]
-
-        for illegal_ch in filter(lambda ch: ch not in allowlist, value):
-            print(f"Illegal character '{illegal_ch}' in {field}", file=sys.stderr)
-            exit(1)
-
-        return value
-
-    def __get_export_cmd(self, export_opts: list, config_file_path: str) -> str:
-        """
-        Generates export commands for variables in the configuration file at
-        config_file_path, as listed by export_opts.
-
-        export_opts is list of tuples in (<option section>, <option name>) form.
-        """
-        config = configparser.ConfigParser()
-        config.read(config_file_path)
-
-        def export_var_cmd(sec: str, opt: str) -> str:
-            var_name = f"SANITIZED_{sec.upper()}_{opt.upper()}"
-            var_val = f"{self.__sanitize(config[sec][opt], sec + '.' + opt)}"
-            return f"{var_name}={var_val}"
-
-        export_cmds = [export_var_cmd(sec, opt) for sec, opt in export_opts]
-        return "export " + " ".join(export_cmds)
-
-    def export_shell_configs(self) -> str:
-        """
-        Return shell command exporting environment variables representing
-        various configuration options used in shell scripts.
-        """
-        # List of configs used in shell scripts: Export only what's needed
-        shell_configs = [
-            ("compute_bench", "compile_jobs"),
-            ("compute_bench", "iterations"),
-            ("average", "cutoff_range"),
-            ("average", "min_threshold"),
-            ("device_selector", "enabled_backends"),
-            ("device_selector", "enabled_devices"),
-        ]
-        return self.__get_export_cmd(shell_configs, self.config_path)
-
-    def export_shell_constants(self) -> str:
-        """
-        Return shell command exporting environment variables representing
-        various constants used in shell scripts.
-        """
-        # List of configs used in shell scripts: Export only what's needed
-        shell_constants = [
-            ("perf_res", "git_repo"),
-            ("perf_res", "git_branch"),
-            ("compute_bench", "git_repo"),
-            ("compute_bench", "git_branch"),
-            ("compute_bench", "git_commit"),
-        ]
-        return self.__get_export_cmd(shell_constants, self.constants_path)
-
-    def export_python_globals(self):
-        """
-        Populate all configs/constants used in python into SanitizedConfig.
-        """
-        all_opts = configparser.ConfigParser()
-        all_opts.read(self.config_path)
-        all_opts.read(self.constants_path)
-
-        # Fields that are supposed to be python objects need to be changed to
-        # python objects manually:
-
-        # metrics.recorded
-        m_rec_str = self.__sanitize(all_opts["metrics"]["recorded"], "metrics.recorded")
-        SanitizedConfig.METRICS_RECORDED = m_rec_str.split(",")
-
-        # metrics.tolerances
-        m_tol_str = self.__sanitize(
-            all_opts["metrics"]["tolerances"], "metrics.tolerances"
-        )
-        metric_tolerances = dict(
-            [pair_str.split(":") for pair_str in m_tol_str.split(",")]
-        )
-
-        for metric, tolerance_str in metric_tolerances.items():
-            if metric not in SanitizedConfig.METRICS_RECORDED:
-                print(
-                    f"Metric compared against {metric} is not being recorded.",
-                    file=sys.stderr,
-                )
-                exit(1)
-            try:
-                metric_tolerances[metric] = float(tolerance_str)
-            except ValueError:
-                print(f"Could not convert '{tolerance_str}' to float.", file=sys.stderr)
-                exit(1)
-
-        SanitizedConfig.METRICS_TOLERANCES = metric_tolerances
-
-        SanitizedConfig.loaded = True
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
deleted file mode 100644
index efa9f67cbfc24..0000000000000
--- a/devops/scripts/benchmarking/compare.py
+++ /dev/null
@@ -1,101 +0,0 @@
-import os
-import csv
-import sys
-from common import Validate, SanitizedConfig
-
-
-class Compare:
-
-    @staticmethod
-    def to_hist_avg(benchmark_name: str, hist_avg_path: str, test_csv_path: str):
-        """
-        Compare a benchmark test result to the historical average
-
-        @param test_name  Name of the benchmark of results being compared
-        @param hist_avg_path  Path to historical average .csv file
-        @param test_csv_path  Path to benchmark result .csv file
-        """
-        hist_avg = dict()  # stores historical median of the test suite of interest
-
-        # Load metrics from historical median being compared against
-        with open(hist_avg_path, "r") as avg_csv:
-            for stat in csv.DictReader(avg_csv):
-                hist_avg[stat["TestCase"]] = {
-                    metric: float(stat[metric])
-                    for metric in SanitizedConfig.METRICS_TOLERANCES
-                }
-
-        status = 0
-        failure_counts = {metric: 0 for metric in SanitizedConfig.METRICS_TOLERANCES}
-        with open(test_csv_path, "r") as sample_csv:
-            # For every test case in our current benchmark test suite:
-            for sample in csv.DictReader(sample_csv):
-                test = sample["TestCase"]
-                # Ignore test cases we haven't profiled before
-                if test not in hist_avg:
-                    continue
-                test_hist_avg = hist_avg[test]
-
-                # Check benchmark test results against historical median
-                for metric, threshold in SanitizedConfig.METRICS_TOLERANCES.items():
-                    max_tolerated = test_hist_avg[metric] * (1 + threshold)
-                    sample_value = Validate.sanitize_stat(sample[metric])
-                    if not isinstance(sample_value, float):
-                        print(
-                            f"Malformatted statistic in {test_csv_path}: "
-                            + f"'{sample[metric]}' for {test}."
-                        )
-                        exit(1)
-
-                    if sample_value > max_tolerated:
-                        # Log failure if fail, otherwise proceed as usual
-                        print(f"\n-- FAILED {benchmark_name}::{test}")
-                        print(
-                            f"  {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tolerance {threshold*100}%: {max_tolerated})\n"
-                        )
-                        with open("./artifact/benchmarks_failed.log", "a") as slow_log:
-                            slow_log.write(
-                                f"-- {benchmark_name}::{test}\n"
-                                f"   {metric}: {sample_value} -- Historic avg. {test_hist_avg[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
-                            )
-                        status = 1
-                        failure_counts[metric] += 1
-        if status != 0:
-            print(f"Failure counts: {failure_counts}")
-        return status
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 4:
-        print(
-            f"Usage: {sys.argv[0]} <path to /devops> <relative path to results directory> <result csv filename>"
-        )
-        exit(1)
-
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    SanitizedConfig.load(sys.argv[1])
-
-    # Both benchmark results git repo and benchmark.sh output are structured
-    # like so:
-    # /<device_selector>/<runner>/<test name>
-    # This relative path is sys.argv[1], while the name of the csv file we are
-    # comparing against is sys.argv[2].
-    benchmark_name = os.path.basename(sys.argv[2])
-    test_csv_path = f"./artifact/failed_tests/{sys.argv[2]}/{sys.argv[3]}"
-    median_path = f"./llvm-ci-perf-results/{sys.argv[2]}/{benchmark_name}-median.csv"
-
-    if not os.path.isfile(test_csv_path):
-        print("Invalid test file provided: " + test_csv_path)
-        exit(1)
-    if not os.path.isfile(median_path):
-        print(
-            f"Median file for benchmark '{benchmark_name}' not found at {median_path}.\n"
-            + "Please compute the median using the aggregate workflow."
-        )
-        exit(1)
-
-    # Compare to median in this case
-    exit(Compare.to_hist_avg(benchmark_name, median_path, test_csv_path))
diff --git a/devops/scripts/benchmarking/load_config.py b/devops/scripts/benchmarking/load_config.py
deleted file mode 100644
index 69c994e5c296c..0000000000000
--- a/devops/scripts/benchmarking/load_config.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from common import Configuration, Validate
-import sys
-
-# TODO better frontend / use argparse
-if __name__ == "__main__":
-
-    def usage_and_exit():
-        print(f"Usage: {sys.argv[0]} <path to /devops> [config | constants]")
-        print(
-            "Generate commands to export configuration options/constants as an environment variable."
-        )
-        exit(1)
-
-    if len(sys.argv) != 3:
-        usage_and_exit()
-
-    if not Validate.filepath(sys.argv[1]):
-        print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr)
-        exit(1)
-    # If the filepath provided passed filepath validation, then it is clean
-    sanitized_filepath = sys.argv[1]
-
-    # Load configuration
-    config = Configuration(sanitized_filepath)
-    if sys.argv[2] == "config":
-        print(config.export_shell_configs())
-    elif sys.argv[2] == "constants":
-        print(config.export_shell_constants())
-    else:
-        usage_and_exit()

From 850ccacc8a6f767b08e8d750a3e3bf8f24a6b44b Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 08:14:10 -0700
Subject: [PATCH 105/114] Fix typo

---
 .github/workflows/sycl-ur-perf-benchmarking.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 28790af47bd6c..1933acfe2c707 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -123,7 +123,7 @@ jobs:
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      tests_selector: benchmark
+      tests_selector: benchmarks
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_save_name: ${{ matrix.save_name }}
       benchmark_preset: ${{ inputs.preset }}
@@ -151,7 +151,7 @@ jobs:
       image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
       image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
       target_devices: ${{ matrix.backend }}
-      tests_selector: benchmark
+      tests_selector: benchmarks
       benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_preset: ${{ inputs.preset }}

From 47d8861013ed1d89310ea0d36d1c27d344bebabb Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 09:01:14 -0700
Subject: [PATCH 106/114] Update nightly to use new workflow

---
 .github/workflows/sycl-nightly.yml | 33 +++++++++++++-----------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 1ccf4a2498e6b..4f6123160408e 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -274,35 +274,30 @@ jobs:
       sycl_toolchain_archive: ${{ needs.build-win.outputs.artifact_archive_name }}
       sycl_cts_artifact: sycl_cts_bin_win
 
-  aggregate_benchmark_results:
-    if: github.repository == 'intel/llvm' && !cancelled()
-    name: Aggregate benchmark results and produce historical averages
-    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
-    secrets:
-      LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
-    with:
-      lookback_days: 100
-      
   run-sycl-benchmarks:
-    needs: [ubuntu2204_build, aggregate_benchmark_results]
+    needs: [ubuntu2204_build]
     if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
     strategy:
-      fail-fast: false
       matrix:
         include:
-          - name: Run compute-benchmarks on L0 PVC
+          - ref: ${{ github.sha }}
+            save_name: Baseline
             runner: '["PVC_PERF"]'
-            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
-            target_devices: level_zero:gpu
+            backend: 'level_zero:gpu'
+            preset: Minimal
     uses: ./.github/workflows/sycl-linux-run-tests.yml
     secrets: inherit
     with:
-      name: ${{ matrix.name }}
+      name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }})
       runner: ${{ matrix.runner }}
-      image_options: ${{ matrix.image_options }}
-      target_devices: ${{ matrix.target_devices }}
-      tests_selector: compute-benchmarks
-      repo_ref: ${{ github.sha }}
+      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
+      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+      target_devices: ${{ matrix.backend }}
+      tests_selector: benchmarks
+      benchmark_upload_results: true
+      benchmark_save_name: ${{ matrix.save_name }}
+      benchmark_preset: ${{ inputs.preset }}
+      repo_ref: ${{ matrix.ref }}
       sycl_toolchain_artifact: sycl_linux_default
       sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}

From 56461ecdef4ba1a774befef37dc4fa39def05817 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 10:48:44 -0700
Subject: [PATCH 107/114] Fix bug with caching

---
 devops/actions/run-tests/benchmark/action.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index c7aa4f3f48c2e..ad1b2e2f58a41 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -151,9 +151,11 @@ runs:
         --results-dir "./llvm-ci-perf-results/results/"
 
   - name: Cache changes to benchmark folder for archival purposes
+    if: always()
     shell: bash
     run: | 
       cd "./llvm-ci-perf-results"
+      git add .
       for diff in $(git diff HEAD --name-only); do
         mkdir -p "../cached_changes/$(dirname $diff)"
         cp "$diff" "../cached_changes/$diff"
@@ -167,7 +169,6 @@ runs:
       git config user.email "sys_sycl_benchmarks@intel.com"
       results_branch="unify-ci"
 
-      git add .
       if git diff --quiet && git diff --cached --quiet; then
         echo "No new results added, skipping push."
         exit 0

From bfbc7b6427280c7b217f7d6f90f9048f1e3fff76 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 11:20:28 -0700
Subject: [PATCH 108/114] Fix typo

---
 .github/workflows/sycl-nightly.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 4f6123160408e..c6cffa66f5fea 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -296,7 +296,7 @@ jobs:
       tests_selector: benchmarks
       benchmark_upload_results: true
       benchmark_save_name: ${{ matrix.save_name }}
-      benchmark_preset: ${{ inputs.preset }}
+      benchmark_preset: ${{ matrix.preset }}
       repo_ref: ${{ matrix.ref }}
       sycl_toolchain_artifact: sycl_linux_default
       sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}

From 39136197b847286288288e604a885ef047c06071 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Mon, 7 Apr 2025 14:36:35 -0700
Subject: [PATCH 109/114] Use no-assertions builds for benchmarking

---
 .github/workflows/sycl-ur-perf-benchmarking.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 1933acfe2c707..318b63b5bdcc7 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -98,6 +98,7 @@ jobs:
       build_cache_root: "/__w/"
       build_artifact_suffix: "default"
       build_cache_suffix: "default"
+      build_configure_extra_args: "--no-assertions"
       build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
       cc: clang
       cxx: clang++
@@ -155,4 +156,7 @@ jobs:
       benchmark_save_name: Baseline
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_preset: ${{ inputs.preset }}
-      repo_ref: ${{ github.ref }} # TODO figure out nightly commit hash
+      repo_ref: ${{ github.ref }}
+      sycl_toolchain_artifact: sycl_linux_sprod_shared
+      sycl_toolchain_archive: sycl_linux_shared.tar.zst
+      sycl_toolchain_decompress_command: zstd

From 2fab911a6cd934163c825516c380bc3565117745 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 8 Apr 2025 07:00:11 -0700
Subject: [PATCH 110/114] Use shared build in nightly

---
 .github/workflows/sycl-nightly.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index c6cffa66f5fea..28dc630fa5364 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -275,8 +275,8 @@ jobs:
       sycl_cts_artifact: sycl_cts_bin_win
 
   run-sycl-benchmarks:
-    needs: [ubuntu2204_build]
-    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
+    needs: [linux_shared_build]
+    if: ${{ always() && !cancelled() && needs.linux_shared_build.outputs.build_conclusion == 'success' }}
     strategy:
       matrix:
         include:
@@ -298,9 +298,9 @@ jobs:
       benchmark_save_name: ${{ matrix.save_name }}
       benchmark_preset: ${{ matrix.preset }}
       repo_ref: ${{ matrix.ref }}
-      sycl_toolchain_artifact: sycl_linux_default
-      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
-      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
+      sycl_toolchain_artifact: sycl_linux_sprod_shared
+      sycl_toolchain_archive: ${{ needs.linux_shared_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.linux_shared_build.outputs.artifact_decompress_command }}
 
   nightly_build_upload:
     name: Nightly Build Upload

From 652fd3999a729470e5215a6a0e79a6d19f3f26bd Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 8 Apr 2025 08:14:59 -0700
Subject: [PATCH 111/114] Build no-assertion versions from scratch instead

---
 .../workflows/sycl-ur-perf-benchmarking.yml   | 50 +++++--------------
 1 file changed, 13 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
index 318b63b5bdcc7..c8900baa78368 100644
--- a/.github/workflows/sycl-ur-perf-benchmarking.yml
+++ b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -24,8 +24,8 @@ on:
         description: |
           Commit hash (within intel/llvm) to build SYCL from if specified.
 
-          If both pr_no and commit_hash are empty, the latest SYCL nightly build
-          will be used.
+          If both pr_no and commit_hash are empty, the latest commit in
+          deployment branch will be used.
         required: false
         default: ''
       upload_results:
@@ -64,7 +64,7 @@ on:
         description: |
           Commit hash (within intel/llvm) to build SYCL from:
 
-          Leave both pr_no and commit_hash empty to use last SYCL nightly build.
+          Leave both pr_no and commit_hash empty to use latest commit.
         required: false
         default: ''
       upload_results:
@@ -90,14 +90,18 @@ permissions: read-all
 
 jobs:
   build_sycl:
-    if: inputs.commit_hash != '' || inputs.pr_no != ''
     name: Build SYCL
     uses: ./.github/workflows/sycl-linux-build.yml
     with:
-      build_ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }}
+      build_ref: |
+        ${{
+          inputs.commit_hash != '' && inputs.commit_hash ||
+          inputs.pr_no != '' && format('refs/pull/{0}/head', inputs.pr_no) ||
+          github.ref
+        }}
       build_cache_root: "/__w/"
-      build_artifact_suffix: "default"
-      build_cache_suffix: "default"
+      build_artifact_suffix: "prod_noassert"
+      build_cache_suffix: "prod_noassert"
       build_configure_extra_args: "--no-assertions"
       build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
       cc: clang
@@ -105,8 +109,7 @@ jobs:
       changes: '[]'
 
   run_benchmarks_build:
-    if: inputs.commit_hash != '' || inputs.pr_no != ''
-    name: Run Benchmarks (on PR Build)
+    name: Run Benchmarks on Build
     needs: [ build_sycl ]
     strategy:
       matrix:
@@ -130,33 +133,6 @@ jobs:
       benchmark_preset: ${{ inputs.preset }}
       repo_ref: ${{ matrix.ref }}
       devops_ref: ${{ github.ref }}
-      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_artifact: sycl_linux_prod_noassert
       sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }}
-
-  run_benchmarks_nightly:
-    name: Run Benchmarks (on Nightly Build)
-    if: inputs.commit_hash == '' && inputs.pr_no == ''
-    strategy:
-      matrix:
-        # Set default values if not specified:
-        include:
-          - runner: ${{ inputs.runner || '["PVC_PERF"]' }}
-            backend: ${{ inputs.backend || 'level_zero:gpu' }}
-    uses: ./.github/workflows/sycl-linux-run-tests.yml
-    secrets: inherit
-    with:
-      # TODO support other benchmarks
-      name: Run compute-benchmarks (Nightly, ${{ matrix.runner }}, ${{ matrix.backend }})
-      runner: ${{ matrix.runner }}
-      image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest
-      image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
-      target_devices: ${{ matrix.backend }}
-      tests_selector: benchmarks
-      benchmark_save_name: Baseline
-      benchmark_upload_results: ${{ inputs.upload_results }}
-      benchmark_preset: ${{ inputs.preset }}
-      repo_ref: ${{ github.ref }}
-      sycl_toolchain_artifact: sycl_linux_sprod_shared
-      sycl_toolchain_archive: sycl_linux_shared.tar.zst
-      sycl_toolchain_decompress_command: zstd

From 5a417870a668542840d1eb3b59e68b01c3da7069 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Tue, 8 Apr 2025 08:48:41 -0700
Subject: [PATCH 112/114] Add a message indicating compare script has indeed
 been ran

---
 devops/actions/run-tests/benchmark/action.yml | 1 +
 devops/scripts/benchmarks/compare.py          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
index ad1b2e2f58a41..182e08422b9dd 100644
--- a/devops/actions/run-tests/benchmark/action.yml
+++ b/devops/actions/run-tests/benchmark/action.yml
@@ -149,6 +149,7 @@ runs:
         --name "$SAVE_NAME" \
         --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
         --results-dir "./llvm-ci-perf-results/results/"
+      echo "-----"
 
   - name: Cache changes to benchmark folder for archival purposes
     if: always()
diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py
index 47170841c693e..c44d06f718039 100644
--- a/devops/scripts/benchmarks/compare.py
+++ b/devops/scripts/benchmarks/compare.py
@@ -367,6 +367,7 @@ def print_regression(entry: dict):
             for test in regressions:
                 print_regression(test)
             exit(1)  # Exit 1 to trigger github test failure
+        print("\nNo regressions found!")
     else:
         print("Unsupported operation: exiting.")
         exit(1)

From 05994f2357e5909acc8d064d5b89008d26f19d79 Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 9 Apr 2025 13:06:04 -0700
Subject: [PATCH 113/114] Add comments to benchmark options in
 sycl-linux-run-tests

---
 .github/workflows/sycl-linux-run-tests.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
index c25050be24f32..ac3e6341cc797 100644
--- a/.github/workflows/sycl-linux-run-tests.yml
+++ b/.github/workflows/sycl-linux-run-tests.yml
@@ -112,14 +112,28 @@ on:
         required: False
 
       benchmark_upload_results:
+        description: |
+          Set to true to upload results to git repository storing benchmarking
+          results.
         type: string
         default: 'false'
         required: False
       benchmark_save_name:
+        description: |
+          Save name to use for benchmark results: Save names are stored in
+          metadata of result file, and are used to identify benchmark results in
+          the same series (e.g. same configuration, same device, etc.).
+
+          Note: Currently, benchmark result filenames are in the format of
+          <benchmark_save_name>_<Device>_<Backend>_YYYYMMDD_HHMMSS.json
         type: string
         default: ''
         required: False
       benchmark_preset:
+        description: |
+          Name of benchmark preset to run.
+
+          See /devops/scripts/benchmarks/presets.py for all presets available.
         type: string
         default: 'Minimal'
         required: False

From c148216fbfdf68d5974c4043b2c48251e85bb1bf Mon Sep 17 00:00:00 2001
From: "Li, Ian" <ian.li@intel.com>
Date: Wed, 9 Apr 2025 13:08:58 -0700
Subject: [PATCH 114/114] Revert changes to use linux_shared_build for
 benchmark runs

---
 .github/workflows/sycl-nightly.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml
index 28dc630fa5364..c6cffa66f5fea 100644
--- a/.github/workflows/sycl-nightly.yml
+++ b/.github/workflows/sycl-nightly.yml
@@ -275,8 +275,8 @@ jobs:
       sycl_cts_artifact: sycl_cts_bin_win
 
   run-sycl-benchmarks:
-    needs: [linux_shared_build]
-    if: ${{ always() && !cancelled() && needs.linux_shared_build.outputs.build_conclusion == 'success' }}
+    needs: [ubuntu2204_build]
+    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
     strategy:
       matrix:
         include:
@@ -298,9 +298,9 @@ jobs:
       benchmark_save_name: ${{ matrix.save_name }}
       benchmark_preset: ${{ matrix.preset }}
       repo_ref: ${{ matrix.ref }}
-      sycl_toolchain_artifact: sycl_linux_sprod_shared
-      sycl_toolchain_archive: ${{ needs.linux_shared_build.outputs.artifact_archive_name }}
-      sycl_toolchain_decompress_command: ${{ needs.linux_shared_build.outputs.artifact_decompress_command }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
 
   nightly_build_upload:
     name: Nightly Build Upload