From e6ca99296021989c7e729b4749e1d830244c1ea1 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 27 Feb 2025 14:01:17 -0800 Subject: [PATCH 001/114] Move UR devops scripts to devops folder --- .github/workflows/ur-benchmarks-reusable.yml | 6 +++--- .github/workflows/ur-build-hw.yml | 2 +- .../scripts/benchmarks/README.md | 0 .../scripts/benchmarks/benches/base.py | 0 .../scripts/benchmarks/benches/compute.py | 0 .../scripts/benchmarks/benches/llamacpp.py | 0 .../scripts/benchmarks/benches/oneapi.py | 0 .../scripts/benchmarks/benches/result.py | 0 .../scripts/benchmarks/benches/syclbench.py | 0 .../scripts/benchmarks/benches/test.py | 0 .../scripts/benchmarks/benches/umf.py | 0 .../scripts/benchmarks/benches/velocity.py | 0 .../benchmarks/benchmark_results.html.template | 0 .../scripts/benchmarks/history.py | 0 .../scripts/benchmarks/main.py | 0 .../scripts/benchmarks/options.py | 0 .../scripts/benchmarks/output_html.py | 0 .../scripts/benchmarks/output_markdown.py | 0 .../scripts/benchmarks/requirements.txt | 0 .../scripts/benchmarks/utils/compute_runtime.py | 0 .../scripts/benchmarks/utils/utils.py | 0 .../scripts/benchmarks/workflow.png | Bin .../.github => devops}/scripts/get_system_info.sh | 0 23 files changed, 4 insertions(+), 4 deletions(-) rename {unified-runtime => devops}/scripts/benchmarks/README.md (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/base.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/compute.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/llamacpp.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/oneapi.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/result.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/syclbench.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/test.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/umf.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benches/velocity.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/benchmark_results.html.template (100%) rename {unified-runtime => devops}/scripts/benchmarks/history.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/main.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/options.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/output_html.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/output_markdown.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/requirements.txt (100%) rename {unified-runtime => devops}/scripts/benchmarks/utils/compute_runtime.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/utils/utils.py (100%) rename {unified-runtime => devops}/scripts/benchmarks/workflow.png (100%) rename {unified-runtime/.github => devops}/scripts/get_system_info.sh (100%) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index 3b5a0480421d4..6e8a4ea535d15 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -82,7 +82,7 @@ jobs: - name: Install pip packages run: | - pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks/requirements.txt + pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt - name: Configure SYCL run: > @@ -139,7 +139,7 @@ jobs: working-directory: ${{ github.workspace }} id: benchmarks run: > - taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py + taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py ~/llvm_bench_workdir --sycl ${{ github.workspace }}/sycl_build --ur ${{ github.workspace }}/ur_install @@ -195,4 +195,4 @@ jobs: - name: Get information about platform if: ${{ always() }} - run: ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh + run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml index 8ad0f45bb35bb..9cf4d262d580d 100644 --- a/.github/workflows/ur-build-hw.yml +++ b/.github/workflows/ur-build-hw.yml @@ -145,4 +145,4 @@ jobs: - name: Get information about platform if: ${{ always() }} - run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh + run: ${{github.workspace}}/devops/scripts/get_system_info.sh diff --git a/unified-runtime/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md similarity index 100% rename from unified-runtime/scripts/benchmarks/README.md rename to devops/scripts/benchmarks/README.md diff --git a/unified-runtime/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/base.py rename to devops/scripts/benchmarks/benches/base.py diff --git a/unified-runtime/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/compute.py rename to devops/scripts/benchmarks/benches/compute.py diff --git a/unified-runtime/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/llamacpp.py rename to devops/scripts/benchmarks/benches/llamacpp.py diff --git a/unified-runtime/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/benches/oneapi.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/oneapi.py rename to devops/scripts/benchmarks/benches/oneapi.py diff --git a/unified-runtime/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/benches/result.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/result.py rename to devops/scripts/benchmarks/benches/result.py diff --git a/unified-runtime/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/syclbench.py rename to devops/scripts/benchmarks/benches/syclbench.py diff --git a/unified-runtime/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/test.py rename to devops/scripts/benchmarks/benches/test.py diff --git a/unified-runtime/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/umf.py rename to devops/scripts/benchmarks/benches/umf.py diff --git a/unified-runtime/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py similarity index 100% rename from unified-runtime/scripts/benchmarks/benches/velocity.py rename to devops/scripts/benchmarks/benches/velocity.py diff --git a/unified-runtime/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template similarity index 100% rename from unified-runtime/scripts/benchmarks/benchmark_results.html.template rename to devops/scripts/benchmarks/benchmark_results.html.template diff --git a/unified-runtime/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py similarity index 100% rename from unified-runtime/scripts/benchmarks/history.py rename to devops/scripts/benchmarks/history.py diff --git a/unified-runtime/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py similarity index 100% rename from unified-runtime/scripts/benchmarks/main.py rename to devops/scripts/benchmarks/main.py diff --git a/unified-runtime/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py similarity index 100% rename from unified-runtime/scripts/benchmarks/options.py rename to devops/scripts/benchmarks/options.py diff --git a/unified-runtime/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py similarity index 100% rename from unified-runtime/scripts/benchmarks/output_html.py rename to devops/scripts/benchmarks/output_html.py diff --git a/unified-runtime/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py similarity index 100% rename from unified-runtime/scripts/benchmarks/output_markdown.py rename to devops/scripts/benchmarks/output_markdown.py diff --git a/unified-runtime/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt similarity index 100% rename from unified-runtime/scripts/benchmarks/requirements.txt rename to devops/scripts/benchmarks/requirements.txt diff --git a/unified-runtime/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py similarity index 100% rename from unified-runtime/scripts/benchmarks/utils/compute_runtime.py rename to devops/scripts/benchmarks/utils/compute_runtime.py diff --git a/unified-runtime/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py similarity index 100% rename from unified-runtime/scripts/benchmarks/utils/utils.py rename to devops/scripts/benchmarks/utils/utils.py diff --git a/unified-runtime/scripts/benchmarks/workflow.png b/devops/scripts/benchmarks/workflow.png similarity index 100% rename from unified-runtime/scripts/benchmarks/workflow.png rename to devops/scripts/benchmarks/workflow.png diff --git a/unified-runtime/.github/scripts/get_system_info.sh b/devops/scripts/get_system_info.sh similarity index 100% rename from unified-runtime/.github/scripts/get_system_info.sh rename to devops/scripts/get_system_info.sh From 3d42db259ac9e04b59a9fe4f660024ac9073736d Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Feb 2025 08:38:44 -0800 Subject: [PATCH 002/114] Restrict number of cores used --- devops/actions/run-tests/benchmark/action.yml | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 7f69fdf832982..69631d044891c 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -46,6 +46,26 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac + - name: Compute CPU core range to run benchmarks on + run: | + # Taken from ur-benchmark-reusable.yml: + + # Compute the core range for the first NUMA node; second node is used by + # UMF. Skip the first 4 cores as the kernel is likely to schedule more + # work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "CPU core range to use: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV - name: Run compute-benchmarks shell: bash run: | @@ -69,7 +89,7 @@ runs: echo "-----" sycl-ls echo "-----" - ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 + taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 - name: Push compute-benchmarks results if: always() shell: bash From 4f08dd6fbf51002f45b0c9a44fa0310e94de5001 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Tue, 4 Mar 2025 13:20:29 -0800 Subject: [PATCH 003/114] Restore ur-benchmark*.yml --- .github/workflows/ur-benchmarks-reusable.yml | 198 ++++++++++++++++++- .github/workflows/ur-benchmarks.yml | 55 +++++- 2 files changed, 240 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index 66ffcecd70314..6e8a4ea535d15 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -1,12 +1,198 @@ name: Benchmarks Reusable -# This workflow is a WIP: This workflow file acts as a placeholder. +on: + workflow_call: + inputs: + str_name: + required: true + type: string + pr_no: + required: true + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. + type: string + bench_script_params: + required: false + type: string + default: '' + sycl_config_params: + required: false + type: string + default: '' + upload_report: + required: false + type: boolean + default: false + compute_runtime_commit: + required: false + type: string + default: '' -on: [ workflow_call ] +permissions: + contents: read + pull-requests: write jobs: - do-nothing: - runs-on: ubuntu-latest + bench-run: + name: Build SYCL, Run Benchmarks + strategy: + matrix: + adapter: [ + {str_name: "${{ inputs.str_name }}", + sycl_config: "${{ inputs.sycl_config_params }}" + } + ] + build_type: [Release] + compiler: [{c: clang, cxx: clang++}] + + runs-on: "PVC_PERF" + steps: - - run: echo 'This workflow is a WIP.' - + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + const pr_no = '${{ inputs.pr_no }}'; + const adapter = '${{ matrix.adapter.str_name }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Checkout SYCL + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + path: sycl-repo + + # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.pr_no != 0 }} + working-directory: ${{github.workspace}}/sycl-repo + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${{ inputs.pr_no }}/*:refs/remotes/origin/pr/${{ inputs.pr_no }}/* + git checkout origin/pr/${{ inputs.pr_no }}/merge + git rev-parse origin/pr/${{ inputs.pr_no }}/merge + + - name: Install pip packages + run: | + pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt + + - name: Configure SYCL + run: > + python3 sycl-repo/buildbot/configure.py + -t ${{matrix.build_type}} + -o ${{github.workspace}}/sycl_build + --cmake-gen "Ninja" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt=-DCMAKE_C_COMPILER_LAUNCHER=ccache + --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache + ${{matrix.adapter.sycl_config}} + + - name: Build SYCL + run: cmake --build ${{github.workspace}}/sycl_build -j $(nproc) + + # We need a complete installed UR for compute-benchmarks. + - name: Configure UR + run: > + cmake -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -S${{github.workspace}}/sycl-repo/unified-runtime + -B${{github.workspace}}/ur_build + -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/ur_install + -DUR_BUILD_TESTS=OFF + -DUR_BUILD_ADAPTER_L0=ON + -DUR_BUILD_ADAPTER_L0_V2=ON + -DUMF_DISABLE_HWLOC=ON + + - name: Build UR + run: cmake --build ${{github.workspace}}/ur_build -j $(nproc) + + - name: Install UR + run: cmake --install ${{github.workspace}}/ur_build + + - name: Compute core range + run: | + # Compute the core range for the first NUMA node; second node is for UMF jobs. + # Skip the first 4 cores - the kernel is likely to schedule more work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "Selected core: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + + - name: Run benchmarks + working-directory: ${{ github.workspace }} + id: benchmarks + run: > + taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py + ~/llvm_bench_workdir + --sycl ${{ github.workspace }}/sycl_build + --ur ${{ github.workspace }}/ur_install + --adapter ${{ matrix.adapter.str_name }} + --compare baseline + --compute-runtime ${{ inputs.compute_runtime_commit }} + --build-igc + ${{ inputs.upload_report && '--output-html' || '' }} + ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} + ${{ inputs.bench_script_params }} + + - name: Print benchmark results + run: | + cat ${{ github.workspace }}/benchmark_results.md || true + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + let markdown = "" + try { + const fs = require('fs'); + markdown = fs.readFileSync('benchmark_results.md', 'utf8'); + } catch(err) { + } + + const pr_no = '${{ inputs.pr_no }}'; + const adapter = '${{ matrix.adapter.str_name }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const test_status = '${{ steps.benchmarks.outcome }}'; + const job_status = '${{ job.status }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Rename benchmark results file + if: ${{ always() && inputs.upload_report }} + run: mv benchmark_results.html benchmark_results_${{ inputs.pr_no }}.html + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: benchmark_results_${{ inputs.pr_no }}.html + key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }} + + - name: Get information about platform + if: ${{ always() }} + run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh diff --git a/.github/workflows/ur-benchmarks.yml b/.github/workflows/ur-benchmarks.yml index 23fbb1ad903b4..cde4bfa828d71 100644 --- a/.github/workflows/ur-benchmarks.yml +++ b/.github/workflows/ur-benchmarks.yml @@ -1,12 +1,53 @@ name: Benchmarks -# This workflow is a WIP: this workflow file acts as a placeholder. +on: + workflow_dispatch: + inputs: + str_name: + description: Adapter + type: choice + required: true + default: 'level_zero' + options: + - level_zero + - level_zero_v2 + pr_no: + description: PR number (0 is sycl main branch) + type: number + required: true + bench_script_params: + description: Benchmark script arguments + type: string + required: false + default: '' + sycl_config_params: + description: Extra params for SYCL configuration + type: string + required: false + default: '' + compute_runtime_commit: + description: 'Compute Runtime commit' + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false -on: [ workflow_dispatch ] +permissions: + contents: read + pull-requests: write jobs: - do-nothing: - runs-on: ubuntu-latest - steps: - - run: echo 'This workflow is a WIP.' - + manual: + name: Compute Benchmarks + uses: ./.github/workflows/ur-benchmarks-reusable.yml + with: + str_name: ${{ inputs.str_name }} + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + sycl_config_params: ${{ inputs.sycl_config_params }} + compute_runtime_commit: ${{ inputs.compute_runtime_commit }} + upload_report: ${{ inputs.upload_report }} From 497dcce9d87e8d610b21afd930669e8059eba54f Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 5 Mar 2025 16:32:30 +0000 Subject: [PATCH 004/114] [benchmarks] improve HTML and Markdown output This patch improves numerous aspects on how the benchmarking results are visualized: - rewrites the way HTML charts are generated, using a library (Chart.js) that's both easier to use and more visually pleasing. The new HTML page also now decouples data from the HTML itself, leading to faster load times and the ability to fetch data from remote sources. - The markdown output now contains a failures section that lists all benchmarks that failed for a given run. This will be a helpful for developers during PR testing. - Benchmarks can now have description that's displayed on the page. - And many more minor improvements. --- devops/scripts/benchmarks/benches/base.py | 17 +- devops/scripts/benchmarks/benches/compute.py | 130 ++-- devops/scripts/benchmarks/benches/llamacpp.py | 18 +- .../scripts/benchmarks/benches/syclbench.py | 36 +- devops/scripts/benchmarks/benches/test.py | 17 +- devops/scripts/benchmarks/benches/umf.py | 33 +- devops/scripts/benchmarks/benches/velocity.py | 79 ++- .../benchmark_results.html.template | 192 ------ devops/scripts/benchmarks/history.py | 19 +- devops/scripts/benchmarks/html/config.js | 5 + devops/scripts/benchmarks/html/index.html | 205 +++++++ devops/scripts/benchmarks/html/scripts.js | 556 ++++++++++++++++++ devops/scripts/benchmarks/main.py | 52 +- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/output_html.py | 352 +---------- devops/scripts/benchmarks/output_markdown.py | 40 +- .../benchmarks/{benches => utils}/oneapi.py | 20 +- .../benchmarks/{benches => utils}/result.py | 17 +- devops/scripts/benchmarks/utils/utils.py | 26 +- 19 files changed, 1167 insertions(+), 648 deletions(-) delete mode 100644 devops/scripts/benchmarks/benchmark_results.html.template create mode 100644 devops/scripts/benchmarks/html/config.js create mode 100644 devops/scripts/benchmarks/html/index.html create mode 100644 devops/scripts/benchmarks/html/scripts.js rename devops/scripts/benchmarks/{benches => utils}/oneapi.py (79%) rename devops/scripts/benchmarks/{benches => utils}/result.py (69%) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index d1bb5fb53b83a..77365220dbf85 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -6,7 +6,7 @@ import os import shutil from pathlib import Path -from .result import Result +from utils.result import Result from options import options from utils.utils import download, run import urllib.request @@ -55,16 +55,25 @@ def create_data_path(self, name, skip_data_dir=False): data_path = os.path.join(self.directory, name) else: data_path = os.path.join(self.directory, "data", name) - if options.rebuild and Path(data_path).exists(): + if options.redownload and Path(data_path).exists(): shutil.rmtree(data_path) Path(data_path).mkdir(parents=True, exist_ok=True) return data_path - def download(self, name, url, file, untar=False, unzip=False, skip_data_dir=False): + def download( + self, + name, + url, + file, + untar=False, + unzip=False, + skip_data_dir=False, + checksum="", + ): self.data_path = self.create_data_path(name, skip_data_dir) - return download(self.data_path, url, file, untar, unzip) + return download(self.data_path, url, file, untar, unzip, checksum) def name(self): raise NotImplementedError() diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 5e420d560a463..18ed969728902 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -8,10 +8,11 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from options import options from enum import Enum + class ComputeBench(Suite): def __init__(self, directory): self.directory = directory @@ -47,9 +48,8 @@ def setup(self): f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", ] - print(f"{self.__class__.__name__}: Run {configure_command}") run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j") + run(f"cmake --build {build_path} -j", add_sycl=True) self.built = True @@ -73,16 +73,6 @@ def benchmarks(self) -> list[Benchmark]: ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024), ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024), VectorSum(self), - MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1), - MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1), - MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0), - MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5), GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5), GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100), @@ -98,6 +88,16 @@ def benchmarks(self) -> list[Benchmark]: SubmitKernelUR(self, 0, 0), SubmitKernelUR(self, 1, 0), SubmitKernelUR(self, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1), + MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0), + MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5), GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5), GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100), @@ -136,6 +136,9 @@ def setup(self): def explicit_group(self): return "" + def description(self) -> str: + return "" + def run(self, env_vars) -> list[Result]: command = [ f"{self.benchmark_bin}", @@ -167,6 +170,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=parse_unit_type(unit), + description=self.description() ) ) return ret @@ -221,6 +225,13 @@ def bin_args(self) -> list[str]: "--KernelExecTime=1", ] + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + return ( + f"Measures CPU time overhead of submitting {order} kernels through SYCL API." + "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time." + ) + class SubmitKernelUR(ComputeBenchmark): def __init__(self, bench, ioq, measureCompletion): @@ -237,6 +248,15 @@ def name(self): def explicit_group(self): return "SubmitKernel" + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + completion = "including" if self.measureCompletion else "excluding" + return ( + f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, " + f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time " + f"to isolate API overhead." + ) + def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", @@ -261,6 +281,14 @@ def name(self): def explicit_group(self): return "SubmitKernel" + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + return ( + f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. " + f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead " + f"from execution time." + ) + def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", @@ -286,6 +314,14 @@ def name(self): order = "in order" if self.ioq else "out of order" return f"api_overhead_benchmark_sycl ExecImmediateCopyQueue {order} from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + order = "in-order" if self.ioq else "out-of-order" + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL {order} queue overhead for {operation} from {self.source} to " + f"{self.destination} memory with {self.size} bytes. Tests immediate execution overheads." + ) + def bin_args(self) -> list[str]: return [ "--iterations=100000", @@ -309,6 +345,13 @@ def __init__(self, bench, isCopyOnly, source, destination, size): def name(self): return f"memory_benchmark_sycl QueueInOrderMemcpy from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + operation = "copy-only" if self.isCopyOnly else "copy and command submission" + return ( + f"Measures SYCL in-order queue memory copy performance for {operation} from " + f"{self.source} to {self.destination} with {self.size} bytes, executed 100 times per iteration." + ) + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -330,6 +373,12 @@ def __init__(self, bench, source, destination, size): def name(self): return f"memory_benchmark_sycl QueueMemcpy from {self.source} to {self.destination}, size {self.size}" + def description(self) -> str: + return ( + f"Measures general SYCL queue memory copy performance from {self.source} to " + f"{self.destination} with {self.size} bytes per operation." + ) + def bin_args(self) -> list[str]: return [ "--iterations=10000", @@ -349,6 +398,12 @@ def __init__(self, bench, type, size, placement): def name(self): return f"memory_benchmark_sycl StreamMemory, placement {self.placement}, type {self.type}, size {self.size}" + def description(self) -> str: + return ( + f"Measures {self.placement} memory bandwidth using {self.type} pattern with " + f"{self.size} bytes. Higher values (GB/s) indicate better performance." + ) + # measurement is in GB/s def lower_is_better(self): return False @@ -362,6 +417,7 @@ def bin_args(self) -> list[str]: "--useEvents=0", "--contents=Zeros", "--multiplier=1", + "--vectorSize=1", ] @@ -372,6 +428,12 @@ def __init__(self, bench): def name(self): return f"miscellaneous_benchmark_sycl VectorSum" + def description(self) -> str: + return ( + "Measures performance of vector addition across 3D grid (512x256x256 elements) " + "using SYCL." + ) + def bin_args(self) -> list[str]: return [ "--iterations=1000", @@ -408,6 +470,16 @@ def name(self): + (" without events" if not self.useEvents else "") ) + def description(self) -> str: + src_type = "device" if self.srcUSM == 1 else "host" + dst_type = "device" if self.dstUSM == 1 else "host" + events = "with" if self.useEvents else "without" + return ( + f"Measures multithreaded memory copy performance with {self.numThreads} threads " + f"each performing {self.numOpsPerThread} operations on {self.allocSize} bytes " + f"from {src_type} to {dst_type} memory {events} events." + ) + def bin_args(self) -> list[str]: return [ "--Ioq=1", @@ -441,6 +513,13 @@ def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): def explicit_group(self): return f"SinKernelGraph {self.numKernels}" + def description(self) -> str: + execution = "using graphs" if self.withGraphs else "without graphs" + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"sin kernels {execution}. Tests overhead and benefits of graph-based execution." + ) + def name(self): return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" @@ -452,28 +531,3 @@ def bin_args(self) -> list[str]: "--withCopyOffload=1", "--immediateAppendCmdList=0", ] - - -class GraphApiSubmitExecGraph(ComputeBenchmark): - def __init__(self, bench, ioq, submit, numKernels): - self.ioq = ioq - self.submit = submit - self.numKernels = numKernels - super().__init__(bench, "graph_api_benchmark_sycl", "SubmitExecGraph") - - def name(self): - return f"graph_api_benchmark_sycl SubmitExecGraph ioq:{self.ioq}, submit:{self.submit}, numKernels:{self.numKernels}" - - def explicit_group(self): - if self.submit: - return "SubmitGraph" - else: - return "ExecGraph" - - def bin_args(self) -> list[str]: - return [ - "--iterations=100", - f"--measureSubmit={self.submit}", - f"--ioq={self.ioq}", - f"--numKernels={self.numKernels}", - ] diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index 6524c95a9f56f..d8e0ab5d007bb 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -8,10 +8,10 @@ from pathlib import Path from utils.utils import download, git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os @@ -43,6 +43,7 @@ def setup(self): self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf", + checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4", ) self.oneapi = get_oneapi() @@ -62,9 +63,9 @@ def setup(self): f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"', f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}", ] - print(f"{self.__class__.__name__}: Run {configure_command}") + run(configure_command, add_sycl=True) - print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j") + run( f"cmake --build {self.build_path} -j", add_sycl=True, @@ -92,6 +93,14 @@ def setup(self): def name(self): return f"llama.cpp" + def description(self) -> str: + return ( + "Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. " + "Runs both prompt processing (initial context processing) and text generation benchmarks with " + "different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct " + "quantized model and leverages SYCL with oneDNN for acceleration." + ) + def lower_is_better(self): return False @@ -130,6 +139,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", + description=self.description() ) ) return results diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index f7cf571a7ecd7..47326b2555a68 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -8,7 +8,7 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from options import options @@ -65,14 +65,14 @@ def benchmarks(self) -> list[Benchmark]: DagTaskS(self), HostDevBandwidth(self), LocalMem(self), - Pattern_L2(self), - Reduction(self), + # Pattern_L2(self), # validation failure + # Reduction(self), # validation failure ScalarProd(self), SegmentReduction(self), - UsmAccLatency(self), + # UsmAccLatency(self), # validation failure UsmAllocLatency(self), - UsmInstrMix(self), - UsmPinnedOverhead(self), + # UsmInstrMix(self), # validation failure + # UsmPinnedOverhead(self), # validation failure VecAdd(self), # *** sycl-bench single benchmarks # TwoDConvolution(self), # run time < 1ms @@ -82,20 +82,20 @@ def benchmarks(self) -> list[Benchmark]: Atax(self), # Atomic_reduction(self), # run time < 1ms Bicg(self), - Correlation(self), - Covariance(self), - Gemm(self), - Gesumv(self), - Gramschmidt(self), + # Correlation(self), # validation failure + # Covariance(self), # validation failure + # Gemm(self), # validation failure + # Gesumv(self), # validation failure + # Gramschmidt(self), # validation failure KMeans(self), LinRegCoeff(self), # LinRegError(self), # run time < 1ms - MatmulChain(self), + # MatmulChain(self), # validation failure MolDyn(self), - Mvt(self), + # Mvt(self), # validation failure Sf(self), - Syr2k(self), - Syrk(self), + # Syr2k(self), # validation failure + # Syrk(self), # validation failure ] @@ -122,7 +122,7 @@ def run(self, env_vars) -> list[Result]: if self.done: return self.outputfile = os.path.join(self.bench.directory, self.test + ".csv") - print(f"{self.__class__.__name__}: Results in {self.outputfile}") + command = [ f"{self.benchmark_bin}", f"--warmup-run", @@ -143,7 +143,7 @@ def run(self, env_vars) -> list[Result]: if not row[0].startswith("#"): res_list.append( Result( - label=row[0], + label=f"{self.name()} {row[0]}", value=float(row[12]) * 1000, # convert to ms passed=(row[1] == "PASS"), command=command, @@ -161,7 +161,7 @@ def teardown(self): return def name(self): - return self.test + return f"{self.bench.name()} {self.test}" # multi benchmarks diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 06eac12b25344..18794d4e9c73c 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -6,7 +6,7 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options import os @@ -19,6 +19,9 @@ def __init__(self): def setup(self): return + def name(self) -> str: + return "Test Suite" + def benchmarks(self) -> list[Benchmark]: bench_configs = [ ("Memory Bandwidth", 2000, 200, "Foo Group"), @@ -36,18 +39,18 @@ def benchmarks(self) -> list[Benchmark]: value = base_value * value_multiplier diff = base_diff * value_multiplier - result.append(TestBench(name, value, diff, group)) + result.append(TestBench(self, name, value, diff, group)) return result class TestBench(Benchmark): - def __init__(self, name, value, diff, group=""): + def __init__(self, suite, name, value, diff, group=""): + super().__init__("", suite) self.bname = name self.value = value self.diff = diff self.group = group - super().__init__("") def name(self): return self.bname @@ -58,6 +61,9 @@ def lower_is_better(self): def setup(self): return + def description(self) -> str: + return f"This is a test benchmark for {self.bname}." + def run(self, env_vars) -> list[Result]: random_value = self.value + random.uniform(-1 * (self.diff), self.diff) return [ @@ -65,10 +71,11 @@ def run(self, env_vars) -> list[Result]: label=self.name(), explicit_group=self.group, value=random_value, - command="", + command=["test", "--arg1", "foo"], env={"A": "B"}, stdout="no output", unit="ms", + description=self.description(), ) ] diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index 15c343b9a9845..1f736e7755f92 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -6,10 +6,10 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import os import csv import io @@ -22,8 +22,6 @@ def isUMFAvailable(): class UMFSuite(Suite): def __init__(self, directory): self.directory = directory - if not isUMFAvailable(): - print("UMF not provided. Related benchmarks will not run") def name(self) -> str: return "UMF" @@ -40,6 +38,8 @@ def benchmarks(self) -> list[Benchmark]: benches = [ GBench(self), GBenchUmfProxy(self), + GBenchJemalloc(self), + GBenchTbbProxy(self), ] return benches @@ -220,10 +220,31 @@ def parse_output(self, output): return results -class GBenchUmfProxy(GBenchPreloaded): +class GBenchGlibc(GBenchPreloaded): + def __init__(self, bench, replacing_lib): + super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib=replacing_lib) + + +class GBenchUmfProxy(GBenchGlibc): def __init__(self, bench): - super().__init__(bench, lib_to_be_replaced="glibc", replacing_lib="umfProxy") + super().__init__(bench, replacing_lib="umfProxy") def extra_env_vars(self) -> dict: umf_proxy_path = os.path.join(options.umf, "lib", "libumf_proxy.so") return {"LD_PRELOAD": umf_proxy_path} + + +class GBenchJemalloc(GBenchGlibc): + def __init__(self, bench): + super().__init__(bench, replacing_lib="jemalloc") + + def extra_env_vars(self) -> dict: + return {"LD_PRELOAD": "libjemalloc.so"} + + +class GBenchTbbProxy(GBenchGlibc): + def __init__(self, bench): + super().__init__(bench, replacing_lib="tbbProxy") + + def extra_env_vars(self) -> dict: + return {"LD_PRELOAD": "libtbbmalloc_proxy.so"} diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index b7d06cbe4a3a2..be36c47ca36d5 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -7,10 +7,10 @@ import shutil from utils.utils import git_clone from .base import Benchmark, Suite -from .result import Result +from utils.result import Result from utils.utils import run, create_build_path from options import options -from .oneapi import get_oneapi +from utils.oneapi import get_oneapi import shutil import os @@ -115,6 +115,9 @@ def extra_env_vars(self) -> dict: def parse_output(self, stdout: str) -> float: raise NotImplementedError() + def description(self) -> str: + return "" + def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) @@ -133,6 +136,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, + description=self.description() ) ] @@ -147,6 +151,12 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Hashtable" + def description(self) -> str: + return ( + "Measures hash table search performance using an efficient lock-free algorithm with linear probing. " + "Reports throughput in millions of keys processed per second. Higher values indicate better performance." + ) + def bin_args(self) -> list[str]: return ["--no-verify"] @@ -170,6 +180,13 @@ def __init__(self, vb: VelocityBench): def name(self): return "Velocity-Bench Bitcracker" + def description(self) -> str: + return ( + "Password-cracking application for BitLocker-encrypted memory units. " + "Uses dictionary attack to find user or recovery passwords. " + "Measures total time required to process 60000 passwords." + ) + def bin_args(self) -> list[str]: self.data_path = os.path.join(self.vb.repo_path, "bitcracker", "hash_pass") @@ -204,11 +221,19 @@ def download_deps(self): "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True, + checksum="7fc62aa729792ede80ed8ae70fb56fa443d479139c5888ed4d4047b98caec106687a0f05886a9ced77922ccba7f65e66", ) def name(self): return "Velocity-Bench Sobel Filter" + def description(self) -> str: + return ( + "Popular RGB-to-grayscale image conversion technique that applies a gaussian filter " + "to reduce edge artifacts. Processes a large 32K x 32K image and measures " + "the time required to apply the filter." + ) + def bin_args(self) -> list[str]: return [ "-i", @@ -249,6 +274,13 @@ def run(self, env_vars) -> list[Result]: def name(self): return "Velocity-Bench QuickSilver" + def description(self) -> str: + return ( + "Solves a simplified dynamic Monte Carlo particle-transport problem used in HPC. " + "Replicates memory access patterns, communication patterns, and branching of Mercury workloads. " + "Reports a figure of merit in MMS/CTT where higher values indicate better performance." + ) + def lower_is_better(self): return False @@ -279,14 +311,22 @@ def __init__(self, vb: VelocityBench): def download_deps(self): self.download( "easywave", - "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", + "https://gitlab.oca.eu/AstroGeoGPM/eazyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz", untar=True, + checksum="3b0cd0efde10122934ba6db8451b8c41f4f95a3370fc967fc5244039ef42aae7e931009af1586fa5ed2143ade8ed47b1", ) def name(self): return "Velocity-Bench Easywave" + def description(self) -> str: + return ( + "A tsunami wave simulator used for researching tsunami generation and wave propagation. " + "Measures the elapsed time in milliseconds to simulate a specified tsunami event " + "based on real-world data." + ) + def bin_args(self) -> list[str]: return [ "-grid", @@ -341,6 +381,13 @@ def download_deps(self): def name(self): return "Velocity-Bench CudaSift" + def description(self) -> str: + return ( + "Implementation of the SIFT (Scale Invariant Feature Transform) algorithm " + "for detecting, describing, and matching local features in images. " + "Measures average processing time in milliseconds." + ) + def parse_output(self, stdout: str) -> float: match = re.search(r"Avg workload time = (\d+\.\d+) ms", stdout) if match: @@ -364,6 +411,7 @@ def download_deps(self): "cifar-10-binary.tar.gz", untar=True, skip_data_dir=True, + checksum="974b1bd62da0cb3b7a42506d42b1e030c9a0cb4a0f2c359063f9c0e65267c48f0329e4493c183a348f44ddc462eaf814", ) return @@ -382,6 +430,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-cifar" + def description(self) -> str: + return ( + "Deep learning image classification workload based on the CIFAR-10 dataset " + "of 60,000 32x32 color images in 10 classes. Uses neural networks to " + "classify input images and measures total calculation time." + ) + def parse_output(self, stdout: str) -> float: match = re.search( r"dl-cifar - total time for whole calculation: (\d+\.\d+) s", stdout @@ -407,6 +462,7 @@ def download_deps(self): "train-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="f40eb179f7c3d2637e789663bde56d444a23e4a0a14477a9e6ed88bc39c8ad6eaff68056c0cd9bb60daf0062b70dc8ee", ) self.download( "datasets", @@ -414,6 +470,7 @@ def download_deps(self): "train-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ba9c11bf9a7f7c2c04127b8b3e568cf70dd3429d9029ca59b7650977a4ac32f8ff5041fe42bc872097487b06a6794e00", ) self.download( "datasets", @@ -421,6 +478,7 @@ def download_deps(self): "t10k-images.idx3-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="1bf45877962fd391f7abb20534a30fd2203d0865309fec5f87d576dbdbefdcb16adb49220afc22a0f3478359d229449c", ) self.download( "datasets", @@ -428,6 +486,7 @@ def download_deps(self): "t10k-labels.idx1-ubyte.gz", unzip=True, skip_data_dir=True, + checksum="ccc1ee70f798a04e6bfeca56a4d0f0de8d8eeeca9f74641c1e1bfb00cf7cc4aa4d023f6ea1b40e79bb4707107845479d", ) def extra_cmake_args(self): @@ -445,6 +504,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench dl-mnist" + def description(self) -> str: + return ( + "Digit recognition based on the MNIST database, one of the oldest and most popular " + "databases of handwritten digits. Uses neural networks to identify digits " + "and measures total calculation time." + ) + def bin_args(self): return ["-conv_algo", "ONEDNN_AUTO"] @@ -488,6 +554,13 @@ def extra_cmake_args(self): def name(self): return "Velocity-Bench svm" + def description(self) -> str: + return ( + "Implementation of Support Vector Machine, a popular classical machine learning technique. " + "Uses supervised learning models with associated algorithms to analyze data " + "for classification and regression analysis. Measures total elapsed time." + ) + def bin_args(self): return [ f"{self.code_path}/a9a", diff --git a/devops/scripts/benchmarks/benchmark_results.html.template b/devops/scripts/benchmarks/benchmark_results.html.template deleted file mode 100644 index 1deeedad66b00..0000000000000 --- a/devops/scripts/benchmarks/benchmark_results.html.template +++ /dev/null @@ -1,192 +0,0 @@ - - - - - - Benchmark Results - - - - -
-

Benchmark Results

-
- -
-
- ${suite_checkboxes_html} -
-
- Historical Results -
- ${timeseries_charts_html} -
-
-
- Comparisons -
- ${bar_charts_html} -
-
-
- - diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 7902aa4f04c35..2bb0b9db8ea38 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -6,7 +6,7 @@ import os import json from pathlib import Path -from benches.result import Result, BenchmarkRun +from utils.result import Result, BenchmarkRun from options import Compare, options from datetime import datetime, timezone from utils.utils import run @@ -63,12 +63,29 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: try: result = run("git rev-parse --short HEAD") git_hash = result.stdout.decode().strip() + + # Get the GitHub repo URL from git remote + remote_result = run("git remote get-url origin") + remote_url = remote_result.stdout.decode().strip() + + # Convert SSH or HTTPS URL to owner/repo format + if remote_url.startswith("git@github.com:"): + # SSH format: git@github.com:owner/repo.git + github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") + elif remote_url.startswith("https://github.com/"): + # HTTPS format: https://github.com/owner/repo.git + github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") + else: + github_repo = None + except: git_hash = "unknown" + github_repo = None return BenchmarkRun( name=name, git_hash=git_hash, + github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, ) diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js new file mode 100644 index 0000000000000..c1210b2b21da5 --- /dev/null +++ b/devops/scripts/benchmarks/html/config.js @@ -0,0 +1,5 @@ +const config = { + remoteDataUrl: '' +}; +// defaultCompareNames = []; +// suiteNames = []; diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html new file mode 100644 index 0000000000000..c10844f15c707 --- /dev/null +++ b/devops/scripts/benchmarks/html/index.html @@ -0,0 +1,205 @@ + + + + + + + Benchmark Results + + + + + + + + +
+

Benchmark Results

+ +
+ +
+
+ +
+
+ + +
+
+
+ Historical Results +
+
+
+ Comparisons +
+
+
+ + diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js new file mode 100644 index 0000000000000..8f0272048136d --- /dev/null +++ b/devops/scripts/benchmarks/html/scripts.js @@ -0,0 +1,556 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Core state +let activeRuns = new Set(defaultCompareNames); +let chartInstances = new Map(); +let timeseriesData, barChartsData, allRunNames; + +// DOM Elements +let runSelect, selectedRunsDiv, suiteFiltersContainer; + +// Run selector functions +function updateSelectedRuns() { + selectedRunsDiv.innerHTML = ''; + activeRuns.forEach(name => { + selectedRunsDiv.appendChild(createRunElement(name)); + }); + updateCharts(); +} + +function createRunElement(name) { + const runElement = document.createElement('span'); + runElement.className = 'selected-run'; + runElement.innerHTML = `${name} `; + return runElement; +} + +function addSelectedRun() { + const selectedRun = runSelect.value; + if (selectedRun && !activeRuns.has(selectedRun)) { + activeRuns.add(selectedRun); + updateSelectedRuns(); + } +} + +function removeRun(name) { + activeRuns.delete(name); + updateSelectedRuns(); +} + +// Chart creation and update +function createChart(data, containerId, type) { + if (chartInstances.has(containerId)) { + chartInstances.get(containerId).destroy(); + } + + const ctx = document.getElementById(containerId).getContext('2d'); + const options = { + responsive: true, + plugins: { + title: { + display: true, + text: data.label + }, + subtitle: { + display: true, + text: data.lower_is_better ? "Lower is better" : "Higher is better" + }, + tooltip: { + callbacks: { + label: (context) => { + if (type === 'time') { + const point = context.raw; + return [ + `${data.label}:`, + `Value: ${point.y.toFixed(2)} ${data.unit}`, + `Stddev: ${point.stddev.toFixed(2)} ${data.unit}`, + `Git Hash: ${point.gitHash}`, + ]; + } else { + return [`${context.dataset.label}:`, + `Value: ${context.parsed.y.toFixed(2)} ${data.unit}`, + ]; + } + } + } + } + }, + scales: { + y: { + title: { + display: true, + text: data.unit + } + } + } + }; + + if (type === 'time') { + options.interaction = { + mode: 'nearest', + intersect: false + }; + options.onClick = (event, elements) => { + if (elements.length > 0) { + const point = elements[0].element.$context.raw; + if (point.gitHash && point.gitRepo) { + window.open(`https://github.com/${point.gitRepo}/commit/${point.gitHash}`, '_blank'); + } + } + }; + options.scales.x = { + type: 'time', + ticks: { + maxRotation: 45, + minRotation: 45, + autoSkip: true, + maxTicksLimit: 10 + } + }; + } + + const chartConfig = { + type: type === 'time' ? 'line' : 'bar', + data: type === 'time' ? + { + datasets: createTimeseriesDatasets(data) + } : + { + labels: data.labels, + datasets: data.datasets + }, + options: options + }; + + const chart = new Chart(ctx, chartConfig); + chartInstances.set(containerId, chart); + return chart; +} + +function createTimeseriesDatasets(data) { + return Object.entries(data.runs).map(([name, points]) => ({ + label: name, + data: points.map(p => ({ + x: new Date(p.date), + y: p.value, + gitHash: p.git_hash, + gitRepo: p.github_repo, + stddev: p.stddev + })), + borderWidth: 1, + pointRadius: 3, + pointStyle: 'circle', + pointHoverRadius: 5 + })); +} + +function updateCharts() { + // Filter data by active runs + const filteredTimeseriesData = timeseriesData.map(chart => ({ + ...chart, + runs: Object.fromEntries( + Object.entries(chart.runs).filter(([name]) => activeRuns.has(name)) + ) + })); + + const filteredBarChartsData = barChartsData.map(chart => ({ + ...chart, + labels: chart.labels.filter(label => activeRuns.has(label)), + datasets: chart.datasets.map(dataset => ({ + ...dataset, + data: dataset.data.filter((_, i) => activeRuns.has(chart.labels[i])) + })) + })); + + // Draw charts with filtered data + drawCharts(filteredTimeseriesData, filteredBarChartsData); +} + +function drawCharts(filteredTimeseriesData, filteredBarChartsData) { + // Clear existing charts + document.querySelectorAll('.charts').forEach(container => container.innerHTML = ''); + chartInstances.forEach(chart => chart.destroy()); + chartInstances.clear(); + + // Create timeseries charts + filteredTimeseriesData.forEach((data, index) => { + const containerId = `timeseries-${index}`; + const container = createChartContainer(data, containerId); + document.querySelector('.timeseries .charts').appendChild(container); + createChart(data, containerId, 'time'); + }); + + // Create bar charts + filteredBarChartsData.forEach((data, index) => { + const containerId = `barchart-${index}`; + const container = createChartContainer(data, containerId); + document.querySelector('.bar-charts .charts').appendChild(container); + createChart(data, containerId, 'bar'); + }); + + // Apply current filters + filterCharts(); +} + +function createChartContainer(data, canvasId) { + const container = document.createElement('div'); + container.className = 'chart-container'; + container.setAttribute('data-label', data.label); + container.setAttribute('data-suite', data.suite); + + const canvas = document.createElement('canvas'); + canvas.id = canvasId; + container.appendChild(canvas); + + // Create details section for extra info + const details = document.createElement('details'); + const summary = document.createElement('summary'); + summary.textContent = "Details"; + + // Add subtle download button to the summary + const downloadButton = document.createElement('button'); + downloadButton.className = 'download-button'; + downloadButton.textContent = 'Download'; + downloadButton.onclick = (event) => { + event.stopPropagation(); // Prevent details toggle + downloadChart(canvasId, data.label); + }; + summary.appendChild(downloadButton); + details.appendChild(summary); + + // Create and append extra info + const extraInfo = document.createElement('div'); + extraInfo.className = 'extra-info'; + extraInfo.innerHTML = generateExtraInfo(data); + details.appendChild(extraInfo); + + container.appendChild(details); + + return container; +} + +// Pre-compute a lookup for the latest run per label +function createLatestRunsLookup(benchmarkRuns) { + const latestRunsMap = new Map(); + + benchmarkRuns.forEach(run => { + // Yes, we need to convert the date every time. I checked. + const runDate = new Date(run.date); + run.results.forEach(result => { + const label = result.label; + if (!latestRunsMap.has(label) || runDate > new Date(latestRunsMap.get(label).date)) { + latestRunsMap.set(label, { + run, + result + }); + } + }); + }); + + return latestRunsMap; +} +const latestRunsLookup = createLatestRunsLookup(benchmarkRuns); + +function generateExtraInfo(data) { + const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label]; + + return labels.map(label => { + const latestRun = latestRunsLookup.get(label); + + if (latestRun) { + return `
+ ${label}: ${formatCommand(latestRun.result)}
+ Description: ${latestRun.result.description} +
`; + } + return `
+ ${label}: No data available +
`; + }).join(''); +} + +function formatCommand(run) { + const envVars = Object.entries(run.env || {}).map(([key, value]) => `${key}=${value}`).join(' '); + let command = run.command ? [...run.command] : []; + + return `${envVars} ${command.join(' ')}`.trim(); +} + +function downloadChart(canvasId, label) { + const chart = chartInstances.get(canvasId); + if (chart) { + const link = document.createElement('a'); + link.href = chart.toBase64Image('image/jpeg', 1) + link.download = `${label}.png`; + link.click(); + } +} + +// URL and filtering functions +function getQueryParam(param) { + const urlParams = new URLSearchParams(window.location.search); + return urlParams.get(param); +} + +function updateURL() { + const url = new URL(window.location); + const regex = document.getElementById('bench-filter').value; + const activeSuites = getActiveSuites(); + const activeRunsList = Array.from(activeRuns); + + if (regex) { + url.searchParams.set('regex', regex); + } else { + url.searchParams.delete('regex'); + } + + if (activeSuites.length > 0) { + url.searchParams.set('suites', activeSuites.join(',')); + } else { + url.searchParams.delete('suites'); + } + + // Handle the runs parameter + if (activeRunsList.length > 0) { + // Check if the active runs are the same as default runs + const defaultRuns = new Set(defaultCompareNames || []); + const isDefaultRuns = activeRunsList.length === defaultRuns.size && + activeRunsList.every(run => defaultRuns.has(run)); + + if (isDefaultRuns) { + // If it's just the default runs, omit the parameter entirely + url.searchParams.delete('runs'); + } else { + url.searchParams.set('runs', activeRunsList.join(',')); + } + } else { + url.searchParams.delete('runs'); + } + + history.replaceState(null, '', url); +} + +function filterCharts() { + const regexInput = document.getElementById('bench-filter').value; + const regex = new RegExp(regexInput, 'i'); + const activeSuites = getActiveSuites(); + + document.querySelectorAll('.chart-container').forEach(container => { + const label = container.getAttribute('data-label'); + const suite = container.getAttribute('data-suite'); + container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none'; + }); + + updateURL(); +} + +function getActiveSuites() { + return Array.from(document.querySelectorAll('.suite-checkbox:checked')) + .map(checkbox => checkbox.getAttribute('data-suite')); +} + +// Data processing +function processTimeseriesData(benchmarkRuns) { + const resultsByLabel = {}; + + benchmarkRuns.forEach(run => { + const runDate = run.date ? new Date(run.date) : null; + run.results.forEach(result => { + if (!resultsByLabel[result.label]) { + resultsByLabel[result.label] = { + label: result.label, + suite: result.suite, + unit: result.unit, + lower_is_better: result.lower_is_better, + runs: {} + }; + } + + if (!resultsByLabel[result.label].runs[run.name]) { + resultsByLabel[result.label].runs[run.name] = []; + } + + resultsByLabel[result.label].runs[run.name].push({ + date: runDate, + value: result.value, + stddev: result.stddev, + git_hash: run.git_hash, + github_repo: run.github_repo + }); + }); + }); + + return Object.values(resultsByLabel); +} + +function processBarChartsData(benchmarkRuns) { + const groupedResults = {}; + + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + if (!result.explicit_group) return; + + if (!groupedResults[result.explicit_group]) { + groupedResults[result.explicit_group] = { + label: result.explicit_group, + suite: result.suite, + unit: result.unit, + lower_is_better: result.lower_is_better, + labels: [], + datasets: [] + }; + } + + const group = groupedResults[result.explicit_group]; + + if (!group.labels.includes(run.name)) { + group.labels.push(run.name); + } + + let dataset = group.datasets.find(d => d.label === result.label); + if (!dataset) { + dataset = { + label: result.label, + data: new Array(group.labels.length).fill(null) + }; + group.datasets.push(dataset); + } + + const runIndex = group.labels.indexOf(run.name); + dataset.data[runIndex] = result.value; + }); + }); + + return Object.values(groupedResults); +} + +// Setup functions +function setupRunSelector() { + runSelect = document.getElementById('run-select'); + selectedRunsDiv = document.getElementById('selected-runs'); + + allRunNames.forEach(name => { + const option = document.createElement('option'); + option.value = name; + option.textContent = name; + runSelect.appendChild(option); + }); + + updateSelectedRuns(); +} + +function setupSuiteFilters() { + suiteFiltersContainer = document.getElementById('suite-filters'); + + suiteNames.forEach(suite => { + const label = document.createElement('label'); + const checkbox = document.createElement('input'); + checkbox.type = 'checkbox'; + checkbox.className = 'suite-checkbox'; + checkbox.dataset.suite = suite; + checkbox.checked = true; + label.appendChild(checkbox); + label.appendChild(document.createTextNode(' ' + suite)); + suiteFiltersContainer.appendChild(label); + suiteFiltersContainer.appendChild(document.createTextNode(' ')); + }); +} + +function initializeCharts() { + // Process raw data + timeseriesData = processTimeseriesData(benchmarkRuns); + barChartsData = processBarChartsData(benchmarkRuns); + allRunNames = [...new Set(benchmarkRuns.map(run => run.name))]; + + // Set up active runs + const runsParam = getQueryParam('runs'); + if (runsParam) { + const runsFromUrl = runsParam.split(','); + + // Start with an empty set + activeRuns = new Set(); + + // Process each run from URL + runsFromUrl.forEach(run => { + if (run === 'default') { + // Special case: include all default runs + (defaultCompareNames || []).forEach(defaultRun => { + if (allRunNames.includes(defaultRun)) { + activeRuns.add(defaultRun); + } + }); + } else if (allRunNames.includes(run)) { + // Add the specific run if it exists + activeRuns.add(run); + } + }); + } else { + // No runs parameter, use defaults + activeRuns = new Set(defaultCompareNames || []); + } + + // Setup UI components + setupRunSelector(); + setupSuiteFilters(); + + // Apply URL parameters + const regexParam = getQueryParam('regex'); + const suitesParam = getQueryParam('suites'); + + if (regexParam) { + document.getElementById('bench-filter').value = regexParam; + } + + if (suitesParam) { + const suites = suitesParam.split(','); + document.querySelectorAll('.suite-checkbox').forEach(checkbox => { + checkbox.checked = suites.includes(checkbox.getAttribute('data-suite')); + }); + } + + // Setup event listeners + document.querySelectorAll('.suite-checkbox').forEach(checkbox => { + checkbox.addEventListener('change', filterCharts); + }); + document.getElementById('bench-filter').addEventListener('input', filterCharts); + + // Draw initial charts + updateCharts(); +} + +// Make functions available globally for onclick handlers +window.addSelectedRun = addSelectedRun; +window.removeRun = removeRun; + +// Load data based on configuration +function loadData() { + const loadingIndicator = document.getElementById('loading-indicator'); + loadingIndicator.style.display = 'block'; // Show loading indicator + + if (config.remoteDataUrl && config.remoteDataUrl !== '') { + // Fetch data from remote URL + fetch(config.remoteDataUrl) + .then(response => response.text()) + .then(scriptContent => { + // Evaluate the script content + eval(scriptContent); + initializeCharts(); + }) + .catch(error => console.error('Error fetching remote data:', error)) + .finally(() => { + loadingIndicator.style.display = 'none'; // Hide loading indicator + }); + } else { + // Use local data + initializeCharts(); + loadingIndicator.style.display = 'none'; // Hide loading indicator + } +} + +// Initialize when DOM is ready +document.addEventListener('DOMContentLoaded', () => { + loadData(); +}); diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 4ad90b39b9001..8f5330d7b4f62 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -27,23 +27,27 @@ def run_iterations( - benchmark: Benchmark, env_vars, iters: int, results: dict[str, list[Result]] + benchmark: Benchmark, + env_vars, + iters: int, + results: dict[str, list[Result]], + failures: dict[str, str], ): for iter in range(iters): - print(f"running {benchmark.name()}, iteration {iter}... ", end="", flush=True) + print(f"running {benchmark.name()}, iteration {iter}... ", flush=True) bench_results = benchmark.run(env_vars) if bench_results is None: - print(f"did not finish (OK for sycl-bench).") + failures[benchmark.name()] = "benchmark produced no results!" break for bench_result in bench_results: - # TODO: report failures in markdown/html ? if not bench_result.passed: - print(f"complete ({bench_result.label}: verification FAILED)") + failures[bench_result.label] = "verification failed" + print(f"complete ({bench_result.label}: verification failed).") continue print( - f"complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." + f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})." ) bench_result.name = bench_result.label @@ -156,6 +160,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): ) benchmarks = [] + failures = {} for s in suites: suite_benchmarks = s.benchmarks() @@ -170,7 +175,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"Setting up {type(s).__name__}") try: s.setup() - except: + except Exception as e: + failures[s.name()] = f"Suite setup failure: {e}" print(f"{type(s).__name__} setup failed. Benchmarks won't be added.") else: print(f"{type(s).__name__} setup complete.") @@ -189,6 +195,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark setup failure: {e}" print(f"failed: {e}") results = [] @@ -199,7 +206,11 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): processed: list[Result] = [] for _ in range(options.iterations_stddev): run_iterations( - benchmark, merged_env_vars, options.iterations, intermediate_results + benchmark, + merged_env_vars, + options.iterations, + intermediate_results, + failures, ) valid, processed = process_results( intermediate_results, benchmark.stddev_threshold() @@ -211,12 +222,16 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.exit_on_failure: raise e else: + failures[benchmark.name()] = f"Benchmark run failure: {e}" print(f"failed: {e}") for benchmark in benchmarks: - print(f"tearing down {benchmark.name()}... ", end="", flush=True) + # this never has any useful information anyway, so hide it behind verbose + if options.verbose: + print(f"tearing down {benchmark.name()}... ", flush=True) benchmark.teardown() - print("complete.") + if options.verbose: + print("{benchmark.name()} teardown complete.") this_name = options.current_run_name chart_data = {} @@ -241,7 +256,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.output_markdown: markdown_content = generate_markdown( - this_name, chart_data, options.output_markdown + this_name, chart_data, failures, options.output_markdown ) with open("benchmark_results.md", "w") as file: @@ -262,14 +277,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): compare_names.append(saved_name) if options.output_html: - html_content = generate_html(history.runs, "intel/llvm", compare_names) + generate_html(history.runs, compare_names) - with open("benchmark_results.html", "w") as file: - file.write(html_content) - - print( - f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html" - ) + print(f"See {os.getcwd()}/html/index.html for the results.") def validate_and_parse_env_args(env_args): @@ -305,6 +315,11 @@ def validate_and_parse_env_args(env_args): help="Do not rebuild the benchmarks from scratch.", action="store_true", ) + parser.add_argument( + "--redownload", + help="Always download benchmark data dependencies, even if they already exist.", + action="store_true", + ) parser.add_argument( "--env", type=str, @@ -430,6 +445,7 @@ def validate_and_parse_env_args(env_args): options.workdir = args.benchmark_directory options.verbose = args.verbose options.rebuild = not args.no_rebuild + options.redownload = args.redownload options.sycl = args.sycl options.iterations = args.iterations options.timeout = args.timeout diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 2e92675264544..206ca94eb0d0b 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -21,6 +21,7 @@ class Options: ur_adapter: str = None umf: str = None rebuild: bool = True + redownload: bool = False benchmark_cwd: str = "INVALID" timeout: float = 600 iterations: int = 3 diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 4ba395bc3aac6..e6e3212dbcdb2 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -3,338 +3,36 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import re +import json import os -from pathlib import Path -import matplotlib.pyplot as plt -import mpld3 -from collections import defaultdict -from dataclasses import dataclass -import matplotlib.dates as mdates -from benches.result import BenchmarkRun, Result -import numpy as np -from string import Template -@dataclass -class BenchmarkMetadata: - unit: str - suite: str - lower_is_better: bool +def generate_html(benchmark_runs: list, compare_names: list[str]): + # Get unique suite names + suite_names = {result.suite for run in benchmark_runs for result in run.results} -@dataclass -class BenchmarkSeries: - label: str - metadata: BenchmarkMetadata - runs: list[BenchmarkRun] + # create path to data.js in html folder + data_path = os.path.join(os.path.dirname(__file__), "html", "data.js") + # Write data to js file + # We can't store this as a standalone json file because it needs to be inline in the html + with open(data_path, "w") as f: + f.write("const benchmarkRuns = [\n") + # it might be tempting to just to create a list and convert + # that to a json, but that leads to json being serialized twice. + for i, run in enumerate(benchmark_runs): + if i > 0: + f.write(",\n") + f.write(run.to_json()) -@dataclass -class BenchmarkChart: - label: str - suite: str - html: str + f.write("\n];\n\n") # terminates benchmarkRuns - -def tooltip_css() -> str: - return ".mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}" - - -def create_time_series_chart( - benchmarks: list[BenchmarkSeries], github_repo: str -) -> list[BenchmarkChart]: - plt.close("all") - - num_benchmarks = len(benchmarks) - if num_benchmarks == 0: - return [] - - html_charts = [] - - for _, benchmark in enumerate(benchmarks): - fig, ax = plt.subplots(figsize=(10, 4)) - - all_values = [] - all_stddevs = [] - - for run in benchmark.runs: - sorted_points = sorted(run.results, key=lambda x: x.date) - dates = [point.date for point in sorted_points] - values = [point.value for point in sorted_points] - stddevs = [point.stddev for point in sorted_points] - - all_values.extend(values) - all_stddevs.extend(stddevs) - - ax.errorbar(dates, values, yerr=stddevs, fmt="-", label=run.name, alpha=0.5) - scatter = ax.scatter(dates, values, picker=True) - - tooltip_labels = [ - f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Value: {point.value:.2f} {benchmark.metadata.unit}\n" - f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n" - f"Git Hash: {point.git_hash}" - for point in sorted_points - ] - - targets = [ - f"https://github.com/{github_repo}/commit/{point.git_hash}" - for point in sorted_points - ] - - tooltip = mpld3.plugins.PointHTMLTooltip( - scatter, tooltip_labels, css=tooltip_css(), targets=targets - ) - mpld3.plugins.connect(fig, tooltip) - - ax.set_title(benchmark.label, pad=20) - performance_indicator = ( - "lower is better" - if benchmark.metadata.lower_is_better - else "higher is better" - ) - ax.text( - 0.5, - 1.05, - f"({performance_indicator})", - ha="center", - transform=ax.transAxes, - style="italic", - fontsize=7, - color="#666666", - ) - - ax.set_xlabel("") - unit = benchmark.metadata.unit - ax.set_ylabel(f"Value ({unit})" if unit else "Value") - ax.grid(True, alpha=0.2) - ax.legend(bbox_to_anchor=(1, 1), loc="upper left") - ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%Y-%m-%d %H:%M:%S")) - - plt.tight_layout() - html_charts.append( - BenchmarkChart( - html=mpld3.fig_to_html(fig), - label=benchmark.label, - suite=benchmark.metadata.suite, - ) - ) - plt.close(fig) - - return html_charts - - -@dataclass -class ExplicitGroup: - name: str - nnames: int - metadata: BenchmarkMetadata - runs: dict[str, dict[str, Result]] - - -def create_explicit_groups( - benchmark_runs: list[BenchmarkRun], compare_names: list[str] -) -> list[ExplicitGroup]: - groups = {} - - for run in benchmark_runs: - if run.name in compare_names: - for res in run.results: - if res.explicit_group != "": - if res.explicit_group not in groups: - groups[res.explicit_group] = ExplicitGroup( - name=res.explicit_group, - nnames=len(compare_names), - metadata=BenchmarkMetadata( - unit=res.unit, - lower_is_better=res.lower_is_better, - suite=res.suite, - ), - runs={}, - ) - - group = groups[res.explicit_group] - if res.label not in group.runs: - group.runs[res.label] = {name: None for name in compare_names} - - if group.runs[res.label][run.name] is None: - group.runs[res.label][run.name] = res - - return list(groups.values()) - - -def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChart]: - plt.close("all") - - html_charts = [] - - for group in groups: - fig, ax = plt.subplots(figsize=(10, 6)) - - x = np.arange(group.nnames) - x_labels = [] - width = 0.8 / len(group.runs) - - max_height = 0 - - for i, (run_name, run_results) in enumerate(group.runs.items()): - offset = width * i - - positions = x + offset - x_labels = run_results.keys() - valid_data = [r.value if r is not None else 0 for r in run_results.values()] - rects = ax.bar(positions, valid_data, width, label=run_name) - # This is a hack to disable all bar_label. Setting labels to empty doesn't work. - # We create our own labels below for each bar, this works better in mpld3. - ax.bar_label(rects, fmt="") - - for rect, run, res in zip(rects, run_results.keys(), run_results.values()): - if res is None: - continue - - height = rect.get_height() - if height > max_height: - max_height = height - - ax.text( - rect.get_x() + rect.get_width() / 2.0, - height + 1, - f"{res.value:.1f}", - ha="center", - va="bottom", - fontsize=9, - ) - - tooltip_labels = [ - f"Date: {res.date.strftime('%Y-%m-%d %H:%M:%S')}\n" - f"Run: {run}\n" - f"Label: {res.label}\n" - f"Value: {res.value:.2f} {res.unit}\n" - f"Stddev: {res.stddev:.2f} {res.unit}\n" - ] - tooltip = mpld3.plugins.LineHTMLTooltip( - rect, tooltip_labels, css=tooltip_css() - ) - mpld3.plugins.connect(ax.figure, tooltip) - - # normally we'd just set legend to be outside - # the chart, but this is not supported by mpld3. - # instead, we adjust the y axis to account for - # the height of the bars. - legend_height = len(group.runs) * 0.1 - ax.set_ylim(0, max_height * (1 + legend_height)) - - ax.set_xticks([]) - ax.grid(True, axis="y", alpha=0.2) - ax.set_ylabel(f"Value ({group.metadata.unit})") - ax.legend(loc="upper left") - ax.set_title(group.name, pad=20) - performance_indicator = ( - "lower is better" if group.metadata.lower_is_better else "higher is better" - ) - ax.text( - 0.5, - 1.03, - f"({performance_indicator})", - ha="center", - transform=ax.transAxes, - style="italic", - fontsize=7, - color="#666666", - ) - - for idx, label in enumerate(x_labels): - # this is a hack to get labels to show above the legend - # we normalize the idx to transAxes transform and offset it a little. - x_norm = (idx + 0.3 - ax.get_xlim()[0]) / ( - ax.get_xlim()[1] - ax.get_xlim()[0] - ) - ax.text(x_norm, 1.03, label, transform=ax.transAxes, color="#666666") - - plt.tight_layout() - html_charts.append( - BenchmarkChart( - label=group.name, - html=mpld3.fig_to_html(fig), - suite=group.metadata.suite, - ) - ) - plt.close(fig) - - return html_charts - - -def process_benchmark_data( - benchmark_runs: list[BenchmarkRun], compare_names: list[str] -) -> list[BenchmarkSeries]: - benchmark_metadata: dict[str, BenchmarkMetadata] = {} - run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list)) - - for run in benchmark_runs: - if run.name not in compare_names: - continue - - for result in run.results: - if result.label not in benchmark_metadata: - benchmark_metadata[result.label] = BenchmarkMetadata( - unit=result.unit, - lower_is_better=result.lower_is_better, - suite=result.suite, - ) - - result.date = run.date - result.git_hash = run.git_hash - run_map[result.label][run.name].append(result) - - benchmark_series = [] - for label, metadata in benchmark_metadata.items(): - runs = [ - BenchmarkRun(name=run_name, results=results) - for run_name, results in run_map[label].items() - ] - benchmark_series.append( - BenchmarkSeries(label=label, metadata=metadata, runs=runs) - ) - - return benchmark_series - - -def generate_html( - benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str] -) -> str: - benchmarks = process_benchmark_data(benchmark_runs, compare_names) - - timeseries = create_time_series_chart(benchmarks, github_repo) - timeseries_charts_html = "\n".join( - f'
{ts.html}
' - for ts in timeseries - ) - - explicit_groups = create_explicit_groups(benchmark_runs, compare_names) - - bar_charts = create_grouped_bar_charts(explicit_groups) - bar_charts_html = "\n".join( - f'
{bc.html}
' - for bc in bar_charts - ) - - suite_names = {t.suite for t in timeseries} - suite_checkboxes_html = " ".join( - f'' - for suite in suite_names - ) - - script_path = os.path.dirname(os.path.realpath(__file__)) - results_template_path = Path(script_path, "benchmark_results.html.template") - with open(results_template_path, "r") as file: - html_template = file.read() - - template = Template(html_template) - data = { - "suite_checkboxes_html": suite_checkboxes_html, - "timeseries_charts_html": timeseries_charts_html, - "bar_charts_html": bar_charts_html, - } - - return template.substitute(data) + # these are not const because they might be modified + # in config.js + f.write("defaultCompareNames = ") + json.dump(compare_names, f) + f.write(";\n\n") # terminates defaultCompareNames + f.write("suiteNames = ") + json.dump(list(suite_names), f) + f.write(";") # terminates suiteNames diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py index dd6711cec6365..18b5779473a75 100644 --- a/devops/scripts/benchmarks/output_markdown.py +++ b/devops/scripts/benchmarks/output_markdown.py @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import collections -from benches.result import Result +from utils.result import Result from options import options, MarkdownSize import ast @@ -138,17 +138,6 @@ def generate_markdown_details( env_dict = res.env command = res.command - # If data is collected from already saved results, - # the content is parsed as strings - if isinstance(res.env, str): - # Since the scripts would be used solely on data prepared - # by our scripts, this should be safe - # However, maybe needs an additional blessing - # https://docs.python.org/3/library/ast.html#ast.literal_eval - env_dict = ast.literal_eval(res.env) - if isinstance(res.command, str): - command = ast.literal_eval(res.command) - section = ( "\n
\n" f"{res.label}\n\n" @@ -179,7 +168,7 @@ def generate_markdown_details( return "\nBenchmark details contain too many chars to display\n" -def generate_summary_table_and_chart( +def generate_summary_table( chart_data: dict[str, list[Result]], baseline_name: str, markdown_size: MarkdownSize ): summary_table = get_chart_markdown_header( @@ -374,10 +363,27 @@ def generate_summary_table_and_chart( return "\n# Summary\n" "Benchmark output is too large to display\n\n" +def generate_failures_section(failures: dict[str, str]) -> str: + if not failures: + return "" + + section = "\n# Failures\n" + section += "| Name | Failure |\n" + section += "|---|---|\n" + + for name, failure in failures.items(): + section += f"| {name} | {failure} |\n" + + return section + + def generate_markdown( - name: str, chart_data: dict[str, list[Result]], markdown_size: MarkdownSize + name: str, + chart_data: dict[str, list[Result]], + failures: dict[str, str], + markdown_size: MarkdownSize, ): - (summary_line, summary_table) = generate_summary_table_and_chart( + (summary_line, summary_table) = generate_summary_table( chart_data, name, markdown_size ) @@ -396,4 +402,6 @@ def generate_markdown( ) generated_markdown += "\n# Details\n" f"{markdown_details}\n" - return generated_markdown + failures_section = generate_failures_section(failures) + + return failures_section + generated_markdown diff --git a/devops/scripts/benchmarks/benches/oneapi.py b/devops/scripts/benchmarks/utils/oneapi.py similarity index 79% rename from devops/scripts/benchmarks/benches/oneapi.py rename to devops/scripts/benchmarks/utils/oneapi.py index 0547f6646e39e..e1876b5ed37fb 100644 --- a/devops/scripts/benchmarks/benches/oneapi.py +++ b/devops/scripts/benchmarks/utils/oneapi.py @@ -7,29 +7,33 @@ from utils.utils import download, run from options import options import os +import hashlib class OneAPI: - # random unique number for benchmark oneAPI installation - ONEAPI_BENCHMARK_INSTANCE_ID = 987654 - def __init__(self): self.oneapi_dir = os.path.join(options.workdir, "oneapi") Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) - # delete if some option is set? + self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir) # can we just hardcode these links? self.install_package( "dnnl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh", + "6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b", ) self.install_package( "mkl", "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh", + "122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002", ) return - def install_package(self, name, url): + def generate_unique_oneapi_id(self, path): + hash_object = hashlib.md5(path.encode()) + return hash_object.hexdigest() + + def install_package(self, name, url, checksum): package_path = os.path.join(self.oneapi_dir, name) if Path(package_path).exists(): print( @@ -37,11 +41,13 @@ def install_package(self, name, url): ) return - package = download(self.oneapi_dir, url, f"package_{name}.sh") + package = download( + self.oneapi_dir, url, f"package_{name}.sh", checksum=checksum + ) try: print(f"installing {name}") run( - f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}" + f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance {self.oneapi_instance_id}" ) except: print("oneAPI installation likely exists already") diff --git a/devops/scripts/benchmarks/benches/result.py b/devops/scripts/benchmarks/utils/result.py similarity index 69% rename from devops/scripts/benchmarks/benches/result.py rename to devops/scripts/benchmarks/utils/result.py index 52a098d91c24a..4e65a3b8aa582 100644 --- a/devops/scripts/benchmarks/benches/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -3,9 +3,9 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional -from dataclasses_json import dataclass_json +from dataclasses_json import config, dataclass_json from datetime import datetime @@ -14,8 +14,8 @@ class Result: label: str value: float - command: str - env: str + command: list[str] + env: dict[str, str] stdout: str passed: bool = True unit: str = "" @@ -26,9 +26,8 @@ class Result: # values below should not be set by the benchmark name: str = "" lower_is_better: bool = True - git_hash: str = "" - date: Optional[datetime] = None suite: str = "Unknown" + description: str = "No description provided." @dataclass_json @@ -37,4 +36,8 @@ class BenchmarkRun: results: list[Result] name: str = "This PR" git_hash: str = "" - date: datetime = None + github_repo: str = None + date: datetime = field( + default=None, + metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat), + ) diff --git a/devops/scripts/benchmarks/utils/utils.py b/devops/scripts/benchmarks/utils/utils.py index d3d88f417cb8b..ba26127ce37b9 100644 --- a/devops/scripts/benchmarks/utils/utils.py +++ b/devops/scripts/benchmarks/utils/utils.py @@ -12,6 +12,7 @@ import urllib # nosec B404 from options import options from pathlib import Path +import hashlib def run( @@ -42,6 +43,12 @@ def run( env.update(env_vars) + if options.verbose: + command_str = " ".join(command) + env_str = " ".join(f"{key}={value}" for key, value in env_vars.items()) + full_command_str = f"{env_str} {command_str}".strip() + print(f"Running: {full_command_str}") + result = subprocess.run( command, cwd=cwd, @@ -104,7 +111,7 @@ def prepare_workdir(dir, version): shutil.rmtree(dir) else: raise Exception( - f"The directory {dir} exists but is a benchmark work directory." + f"The directory {dir} exists but is not a benchmark work directory." ) os.makedirs(dir) @@ -125,11 +132,26 @@ def create_build_path(directory, name): return build_path -def download(dir, url, file, untar=False, unzip=False): +def calculate_checksum(file_path): + sha_hash = hashlib.sha384() + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha_hash.update(byte_block) + return sha_hash.hexdigest() + + +def download(dir, url, file, untar=False, unzip=False, checksum=""): data_file = os.path.join(dir, file) if not Path(data_file).exists(): print(f"{data_file} does not exist, downloading") urllib.request.urlretrieve(url, data_file) + calculated_checksum = calculate_checksum(data_file) + if calculated_checksum != checksum: + print( + f"Checksum mismatch: expected {checksum}, got {calculated_checksum}. Refusing to continue." + ) + exit(1) + if untar: file = tarfile.open(data_file) file.extractall(dir) From 3cbed5e3391366f16b6ff11d2a0d2e7a68511b58 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 5 Mar 2025 13:51:42 -0800 Subject: [PATCH 005/114] Test UR benchmarking suite --- devops/actions/run-tests/benchmark/action.yml | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 69631d044891c..9846b5c8bd6c6 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -47,6 +47,7 @@ runs: echo "#" ;; esac - name: Compute CPU core range to run benchmarks on + shell: bash run: | # Taken from ur-benchmark-reusable.yml: @@ -89,27 +90,30 @@ runs: echo "-----" sycl-ls echo "-----" - taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 - - name: Push compute-benchmarks results - if: always() - shell: bash - run: | - # TODO -- waiting on security clearance - # Load configuration values - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) - - cd "./llvm-ci-perf-results" - git config user.name "SYCL Benchmarking Bot" - git config user.email "sys_sycl_benchmarks@intel.com" - git pull - git add . - # Make sure changes have been made - if git diff --quiet && git diff --cached --quiet; then - echo "No new results added, skipping push." - else - git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" - fi + mkdir -v ./llvm_test_workdir + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline + echo "-----" + ls +# - name: Push compute-benchmarks results +# if: always() +# shell: bash +# run: | +# # TODO -- waiting on security clearance +# # Load configuration values +# $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) +# +# cd "./llvm-ci-perf-results" +# git config user.name "SYCL Benchmarking Bot" +# git config user.email "sys_sycl_benchmarks@intel.com" +# git pull +# git add . +# # Make sure changes have been made +# if git diff --quiet && git diff --cached --quiet; then +# echo "No new results added, skipping push." +# else +# git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" +# git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" +# fi - name: Find benchmark result artifact here if: always() shell: bash From f79bbbfefe01c64963286c5aed5f84848b755200 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 5 Mar 2025 14:49:57 -0800 Subject: [PATCH 006/114] Bump tolerance to 7% --- devops/benchmarking/config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini index c0b3ca9c31c9e..6b0ecc21f940f 100644 --- a/devops/benchmarking/config.ini +++ b/devops/benchmarking/config.ini @@ -23,7 +23,7 @@ recorded = Median,StdDev ; the historical average. Metrics not included here are not compared against ; when passing/failing benchmark results. ; Format: comma-separated list of : -tolerances = Median:0.5 +tolerances = Median:0.7 ; Options for computing historical averages [average] From ffc813919aa9f165b040fa11742d5bd909befabe Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 5 Mar 2025 14:50:30 -0800 Subject: [PATCH 007/114] Revert "Bump tolerance to 7%" This reverts commit f79bbbfefe01c64963286c5aed5f84848b755200. --- devops/benchmarking/config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini index 6b0ecc21f940f..c0b3ca9c31c9e 100644 --- a/devops/benchmarking/config.ini +++ b/devops/benchmarking/config.ini @@ -23,7 +23,7 @@ recorded = Median,StdDev ; the historical average. Metrics not included here are not compared against ; when passing/failing benchmark results. ; Format: comma-separated list of : -tolerances = Median:0.7 +tolerances = Median:0.5 ; Options for computing historical averages [average] From 0a34e0d0914de06e0a086cbdcd44d0f1589447e2 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 6 Mar 2025 12:20:13 +0000 Subject: [PATCH 008/114] [benchmarks] fix failing benchmarks, improve html output ... and add presets to more easily --- devops/scripts/benchmarks/benches/compute.py | 2 +- devops/scripts/benchmarks/benches/llamacpp.py | 2 +- .../scripts/benchmarks/benches/syclbench.py | 13 ++-- devops/scripts/benchmarks/benches/velocity.py | 2 +- devops/scripts/benchmarks/history.py | 5 +- devops/scripts/benchmarks/html/config.js | 7 +-- devops/scripts/benchmarks/html/scripts.js | 46 ++++++++------ devops/scripts/benchmarks/main.py | 38 ++++++----- devops/scripts/benchmarks/options.py | 7 +-- devops/scripts/benchmarks/output_html.py | 63 +++++++++++-------- devops/scripts/benchmarks/output_markdown.py | 4 +- devops/scripts/benchmarks/presets.py | 50 +++++++++++++++ 12 files changed, 153 insertions(+), 86 deletions(-) create mode 100644 devops/scripts/benchmarks/presets.py diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 18ed969728902..d35a8e2791648 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -170,7 +170,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=parse_unit_type(unit), - description=self.description() + description=self.description(), ) ) return ret diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index d8e0ab5d007bb..c12f811942849 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -139,7 +139,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", - description=self.description() + description=self.description(), ) ) return results diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index 47326b2555a68..cc2db0a2fcf7c 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -105,7 +105,6 @@ def __init__(self, bench, name, test): self.bench = bench self.bench_name = name self.test = test - self.done = False def bin_args(self) -> list[str]: return [] @@ -119,8 +118,6 @@ def setup(self): ) def run(self, env_vars) -> list[Result]: - if self.done: - return self.outputfile = os.path.join(self.bench.directory, self.test + ".csv") command = [ @@ -152,17 +149,17 @@ def run(self, env_vars) -> list[Result]: unit="ms", ) ) - self.done = True - return res_list - def teardown(self): - print(f"Removing {self.outputfile}...") os.remove(self.outputfile) - return + + return res_list def name(self): return f"{self.bench.name()} {self.test}" + def teardown(self): + return + # multi benchmarks class Blocked_transform(SyclBenchmark): diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index be36c47ca36d5..652a831d0222e 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -136,7 +136,7 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, - description=self.description() + description=self.description(), ) ] diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 2bb0b9db8ea38..2b7002ed7faa9 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -61,11 +61,12 @@ def extract_index(file_path: Path) -> int: def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: try: - result = run("git rev-parse --short HEAD") + script_dir = os.path.dirname(os.path.abspath(__file__)) + result = run("git rev-parse --short HEAD", cwd=script_dir) git_hash = result.stdout.decode().strip() # Get the GitHub repo URL from git remote - remote_result = run("git remote get-url origin") + remote_result = run("git remote get-url origin", cwd=script_dir) remote_url = remote_result.stdout.decode().strip() # Convert SSH or HTTPS URL to owner/repo format diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js index c1210b2b21da5..3e67ae1dce8e5 100644 --- a/devops/scripts/benchmarks/html/config.js +++ b/devops/scripts/benchmarks/html/config.js @@ -1,5 +1,2 @@ -const config = { - remoteDataUrl: '' -}; -// defaultCompareNames = []; -// suiteNames = []; +//remoteDataUrl = 'https://example.com/data.json'; +//defaultCompareNames = ['baseline']; diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 8f0272048136d..7b8b4d742cca2 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -114,14 +114,12 @@ function createChart(data, containerId, type) { const chartConfig = { type: type === 'time' ? 'line' : 'bar', - data: type === 'time' ? - { - datasets: createTimeseriesDatasets(data) - } : - { - labels: data.labels, - datasets: data.datasets - }, + data: type === 'time' ? { + datasets: createTimeseriesDatasets(data) + } : { + labels: data.labels, + datasets: data.datasets + }, options: options }; @@ -221,10 +219,12 @@ function createChartContainer(data, canvasId) { summary.appendChild(downloadButton); details.appendChild(summary); + latestRunsLookup = createLatestRunsLookup(benchmarkRuns); + // Create and append extra info const extraInfo = document.createElement('div'); extraInfo.className = 'extra-info'; - extraInfo.innerHTML = generateExtraInfo(data); + extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data); details.appendChild(extraInfo); container.appendChild(details); @@ -252,9 +252,8 @@ function createLatestRunsLookup(benchmarkRuns) { return latestRunsMap; } -const latestRunsLookup = createLatestRunsLookup(benchmarkRuns); -function generateExtraInfo(data) { +function generateExtraInfo(latestRunsLookup, data) { const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label]; return labels.map(label => { @@ -283,7 +282,7 @@ function downloadChart(canvasId, label) { const chart = chartInstances.get(canvasId); if (chart) { const link = document.createElement('a'); - link.href = chart.toBase64Image('image/jpeg', 1) + link.href = chart.toBase64Image('image/png', 1) link.download = `${label}.png`; link.click(); } @@ -445,6 +444,13 @@ function setupRunSelector() { function setupSuiteFilters() { suiteFiltersContainer = document.getElementById('suite-filters'); + const suiteNames = new Set(); + benchmarkRuns.forEach(run => { + run.results.forEach(result => { + suiteNames.add(result.suite); + }); + }); + suiteNames.forEach(suite => { const label = document.createElement('label'); const checkbox = document.createElement('input'); @@ -530,16 +536,18 @@ function loadData() { const loadingIndicator = document.getElementById('loading-indicator'); loadingIndicator.style.display = 'block'; // Show loading indicator - if (config.remoteDataUrl && config.remoteDataUrl !== '') { + if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') { // Fetch data from remote URL - fetch(config.remoteDataUrl) - .then(response => response.text()) - .then(scriptContent => { - // Evaluate the script content - eval(scriptContent); + fetch(remoteDataUrl) + .then(response => response.json()) + .then(data => { + benchmarkRuns = data; initializeCharts(); }) - .catch(error => console.error('Error fetching remote data:', error)) + .catch(error => { + console.error('Error fetching remote data:', error); + loadingIndicator.textContent = 'Fetching remote data failed.'; + }) .finally(() => { loadingIndicator.style.display = 'none'; // Hide loading indicator }); diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 8f5330d7b4f62..716f162c48feb 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -17,6 +17,7 @@ from history import BenchmarkHistory from utils.utils import prepare_workdir from utils.compute_runtime import * +from presets import Presets import argparse import re @@ -153,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): SyclBench(directory), LlamaCppBench(directory), UMFSuite(directory), - # TestSuite() + TestSuite() ] if not options.dry_run else [] @@ -163,6 +164,9 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): failures = {} for s in suites: + if s.name() not in options.preset.suites(): + continue + suite_benchmarks = s.benchmarks() if filter: suite_benchmarks = [ @@ -182,14 +186,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): print(f"{type(s).__name__} setup complete.") benchmarks += suite_benchmarks - for b in benchmarks: - print(b.name()) - for benchmark in benchmarks: try: - print(f"Setting up {benchmark.name()}... ") + if options.verbose: + print(f"Setting up {benchmark.name()}... ") benchmark.setup() - print(f"{benchmark.name()} setup complete.") + if options.verbose: + print(f"{benchmark.name()} setup complete.") except Exception as e: if options.exit_on_failure: @@ -279,8 +282,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.output_html: generate_html(history.runs, compare_names) - print(f"See {os.getcwd()}/html/index.html for the results.") - def validate_and_parse_env_args(env_args): env_vars = {} @@ -362,12 +363,6 @@ def validate_and_parse_env_args(env_args): help="Regex pattern to filter benchmarks by name.", default=None, ) - parser.add_argument( - "--epsilon", - type=float, - help="Threshold to consider change of performance significant", - default=options.epsilon, - ) parser.add_argument( "--verbose", help="Print output of all the commands.", action="store_true" ) @@ -394,7 +389,11 @@ def validate_and_parse_env_args(env_args): help="Specify whether markdown output should fit the content size limit for request validation", ) parser.add_argument( - "--output-html", help="Create HTML output", action="store_true", default=False + "--output-html", + help="Create HTML output. Local output is for direct local viewing of the html file, remote is for server deployment.", + nargs="?", + const=options.output_html, + choices=["local", "remote"], ) parser.add_argument( "--dry-run", @@ -438,6 +437,13 @@ def validate_and_parse_env_args(env_args): help="Directory for cublas library", default=None, ) + parser.add_argument( + "--preset", + type=str, + choices=[p.name for p in Presets], + help="Benchmark preset to run.", + default='FULL', + ) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -449,7 +455,6 @@ def validate_and_parse_env_args(env_args): options.sycl = args.sycl options.iterations = args.iterations options.timeout = args.timeout - options.epsilon = args.epsilon options.ur = args.ur options.ur_adapter = args.adapter options.exit_on_failure = args.exit_on_failure @@ -464,6 +469,7 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory + options.preset = Presets[args.preset].value() if args.build_igc and args.compute_runtime is None: parser.error("--build-igc requires --compute-runtime to be set") diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 206ca94eb0d0b..fd08ce83d145e 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import Enum - +from presets import Preset class Compare(Enum): LATEST = "latest" @@ -29,11 +29,9 @@ class Options: compare: Compare = Compare.LATEST compare_max: int = 10 # average/median over how many results output_markdown: MarkdownSize = MarkdownSize.SHORT - output_html: bool = False + output_html: str = "local" dry_run: bool = False - # these two should probably be merged into one setting stddev_threshold: float = 0.02 - epsilon: float = 0.02 iterations_stddev: int = 5 build_compute_runtime: bool = False extra_ld_libraries: list[str] = field(default_factory=list) @@ -41,6 +39,7 @@ class Options: compute_runtime_tag: str = "25.05.32567.12" build_igc: bool = False current_run_name: str = "This PR" + preset: Preset = None options = Options() diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index e6e3212dbcdb2..d84fd659beb6c 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -5,34 +5,43 @@ import json import os +from options import options def generate_html(benchmark_runs: list, compare_names: list[str]): - - # Get unique suite names - suite_names = {result.suite for run in benchmark_runs for result in run.results} - # create path to data.js in html folder - data_path = os.path.join(os.path.dirname(__file__), "html", "data.js") - - # Write data to js file - # We can't store this as a standalone json file because it needs to be inline in the html - with open(data_path, "w") as f: - f.write("const benchmarkRuns = [\n") - # it might be tempting to just to create a list and convert - # that to a json, but that leads to json being serialized twice. - for i, run in enumerate(benchmark_runs): - if i > 0: - f.write(",\n") - f.write(run.to_json()) - - f.write("\n];\n\n") # terminates benchmarkRuns - - # these are not const because they might be modified - # in config.js - f.write("defaultCompareNames = ") - json.dump(compare_names, f) - f.write(";\n\n") # terminates defaultCompareNames - f.write("suiteNames = ") - json.dump(list(suite_names), f) - f.write(";") # terminates suiteNames + html_path = os.path.join(os.path.dirname(__file__), "html") + + if options.output_html == "local": + data_path = os.path.join(html_path, "data.js") + # Write data to js file + # We can't store this as a standalone json file because it needs to be inline in the html + with open(data_path, "w") as f: + f.write("benchmarkRuns = [\n") + # it might be tempting to just to create a list and convert + # that to a json, but that leads to json being serialized twice. + for i, run in enumerate(benchmark_runs): + if i > 0: + f.write(",\n") + f.write(run.to_json()) + + f.write("\n];\n\n") # terminates benchmarkRuns + + f.write("defaultCompareNames = ") + json.dump(compare_names, f) + f.write(";\n") # terminates defaultCompareNames + + print(f"See {os.getcwd()}/html/index.html for the results.") + else: + data_path = os.path.join(html_path, "data.json") + with open(data_path, "w") as f: + f.write("[\n") + for i, run in enumerate(benchmark_runs): + if i > 0: + f.write(",\n") + f.write(run.to_json()) + f.write("\n];\n") + + print( + f"Upload {data_path} to a location set in config.js remoteDataUrl argument." + ) diff --git a/devops/scripts/benchmarks/output_markdown.py b/devops/scripts/benchmarks/output_markdown.py index 18b5779473a75..3295968603d0c 100644 --- a/devops/scripts/benchmarks/output_markdown.py +++ b/devops/scripts/benchmarks/output_markdown.py @@ -79,7 +79,7 @@ def get_improved_regressed_summary(is_improved: bool, rows_count: int): "\n
\n" "\n" f"{title} {rows_count} " - f"(threshold {options.epsilon*100:.2f}%)\n" + f"(threshold {options.stddev_threshold*100:.2f}%)\n" "\n\n" ) @@ -265,7 +265,7 @@ def generate_summary_table( delta = oln.diff - 1 oln.row += f" {delta*100:.2f}%" - if abs(delta) > options.epsilon: + if abs(delta) > options.stddev_threshold: if delta > 0: improved_rows.append(oln.row + " | \n") else: diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py new file mode 100644 index 0000000000000..46f8257cd01ec --- /dev/null +++ b/devops/scripts/benchmarks/presets.py @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from enum import Enum + +class Preset(): + def description(self): + pass + def suites(self) -> list[str]: + return [] + +class Full(Preset): + def description(self): + return "All available benchmarks." + def suites(self) -> list[str]: + return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF'] + +class SYCL(Preset): + def description(self): + return "All available benchmarks related to SYCL." + def suites(self) -> list[str]: + return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench'] + +class Minimal(Preset): + def description(self): + return "Short microbenchmarks." + def suites(self) -> list[str]: + return ['Compute Benchmarks'] + +class Normal(Preset): + def description(self): + return "Comprehensive mix of microbenchmarks and real applications." + def suites(self) -> list[str]: + return ['Compute Benchmarks'] + +class Test(Preset): + def description(self): + return "Noop benchmarks for framework testing." + def suites(self) -> list[str]: + return ['Test Suite'] + + +class Presets(Enum): + FULL = Full + SYCL = SYCL # Nightly + NORMAL = Normal # PR + MINIMAL = Minimal # Quick smoke tests + TEST = Test From 3f42420d95522557ff09c45aa5db480d1f636eda Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 6 Mar 2025 13:47:30 +0000 Subject: [PATCH 009/114] [benchmarks] fix python formatting with black --- devops/scripts/benchmarks/main.py | 4 ++-- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/presets.py | 36 +++++++++++++++++++++------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 716f162c48feb..4df66d7ad9c4c 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -154,7 +154,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): SyclBench(directory), LlamaCppBench(directory), UMFSuite(directory), - TestSuite() + TestSuite(), ] if not options.dry_run else [] @@ -442,7 +442,7 @@ def validate_and_parse_env_args(env_args): type=str, choices=[p.name for p in Presets], help="Benchmark preset to run.", - default='FULL', + default="FULL", ) args = parser.parse_args() diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index fd08ce83d145e..7f4f3a9a32eb3 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -2,6 +2,7 @@ from enum import Enum from presets import Preset + class Compare(Enum): LATEST = "latest" AVERAGE = "average" diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 46f8257cd01ec..4db68a925a54e 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -5,46 +5,64 @@ from enum import Enum -class Preset(): + +class Preset: def description(self): pass + def suites(self) -> list[str]: return [] + class Full(Preset): def description(self): return "All available benchmarks." + def suites(self) -> list[str]: - return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench', 'UMF'] + return [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + "UMF", + ] + class SYCL(Preset): def description(self): return "All available benchmarks related to SYCL." + def suites(self) -> list[str]: - return ['Compute Benchmarks', 'llama.cpp bench', 'SYCL-Bench', 'Velocity Bench'] + return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"] + class Minimal(Preset): def description(self): return "Short microbenchmarks." + def suites(self) -> list[str]: - return ['Compute Benchmarks'] + return ["Compute Benchmarks"] + class Normal(Preset): def description(self): return "Comprehensive mix of microbenchmarks and real applications." + def suites(self) -> list[str]: - return ['Compute Benchmarks'] + return ["Compute Benchmarks"] + class Test(Preset): def description(self): return "Noop benchmarks for framework testing." + def suites(self) -> list[str]: - return ['Test Suite'] + return ["Test Suite"] class Presets(Enum): FULL = Full - SYCL = SYCL # Nightly - NORMAL = Normal # PR - MINIMAL = Minimal # Quick smoke tests + SYCL = SYCL # Nightly + NORMAL = Normal # PR + MINIMAL = Minimal # Quick smoke tests TEST = Test From 1c7b189db0c8a8d2883ced52ac3e2b45840c792d Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 6 Mar 2025 14:35:11 +0000 Subject: [PATCH 010/114] update driver version --- devops/scripts/benchmarks/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 7f4f3a9a32eb3..aba5aac434917 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -37,7 +37,7 @@ class Options: build_compute_runtime: bool = False extra_ld_libraries: list[str] = field(default_factory=list) extra_env_vars: dict = field(default_factory=dict) - compute_runtime_tag: str = "25.05.32567.12" + compute_runtime_tag: str = "25.05.32567.18" build_igc: bool = False current_run_name: str = "This PR" preset: Preset = None From ad13e93adf8cabd17a7f384f68d509fdbc58a134 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 6 Mar 2025 15:02:42 +0000 Subject: [PATCH 011/114] simplify preset implementation and fix normal preset --- devops/scripts/benchmarks/main.py | 8 ++--- devops/scripts/benchmarks/options.py | 4 +-- devops/scripts/benchmarks/presets.py | 51 +++++++++++++--------------- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 4df66d7ad9c4c..11f02d627a87f 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -17,7 +17,7 @@ from history import BenchmarkHistory from utils.utils import prepare_workdir from utils.compute_runtime import * -from presets import Presets +from presets import preset_get_by_name, presets import argparse import re @@ -440,9 +440,9 @@ def validate_and_parse_env_args(env_args): parser.add_argument( "--preset", type=str, - choices=[p.name for p in Presets], + choices=[p.name() for p in presets], help="Benchmark preset to run.", - default="FULL", + default=options.preset.name(), ) args = parser.parse_args() @@ -469,7 +469,7 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory - options.preset = Presets[args.preset].value() + options.preset = preset_get_by_name(args.preset) if args.build_igc and args.compute_runtime is None: parser.error("--build-igc requires --compute-runtime to be set") diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index aba5aac434917..428ab1f13e9af 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import Enum -from presets import Preset +from presets import Preset, presets class Compare(Enum): @@ -40,7 +40,7 @@ class Options: compute_runtime_tag: str = "25.05.32567.18" build_igc: bool = False current_run_name: str = "This PR" - preset: Preset = None + preset: Preset = presets[0] options = Options() diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 4db68a925a54e..54727446ecc7d 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -3,22 +3,23 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from enum import Enum - +from typing import List, Type class Preset: - def description(self): - pass + def description(self) -> str: + raise NotImplementedError - def suites(self) -> list[str]: - return [] + def name(self) -> str: + return self.__class__.__name__ + def suites(self) -> List[str]: + raise NotImplementedError class Full(Preset): - def description(self): + def description(self) -> str: return "All available benchmarks." - def suites(self) -> list[str]: + def suites(self) -> List[str]: return [ "Compute Benchmarks", "llama.cpp bench", @@ -27,42 +28,38 @@ def suites(self) -> list[str]: "UMF", ] - class SYCL(Preset): - def description(self): + def description(self) -> str: return "All available benchmarks related to SYCL." - def suites(self) -> list[str]: + def suites(self) -> List[str]: return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"] - class Minimal(Preset): - def description(self): + def description(self) -> str: return "Short microbenchmarks." - def suites(self) -> list[str]: + def suites(self) -> List[str]: return ["Compute Benchmarks"] - class Normal(Preset): - def description(self): + def description(self) -> str: return "Comprehensive mix of microbenchmarks and real applications." - def suites(self) -> list[str]: - return ["Compute Benchmarks"] - + def suites(self) -> List[str]: + return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"] class Test(Preset): - def description(self): + def description(self) -> str: return "Noop benchmarks for framework testing." - def suites(self) -> list[str]: + def suites(self) -> List[str]: return ["Test Suite"] +presets = [Full(), SYCL(), Minimal(), Normal(), Test()] -class Presets(Enum): - FULL = Full - SYCL = SYCL # Nightly - NORMAL = Normal # PR - MINIMAL = Minimal # Quick smoke tests - TEST = Test +def preset_get_by_name(name: str) -> Preset: + for p in presets: + if p.name().upper() == name.upper(): + return p + raise ValueError(f"Preset '{name}' not found.") From 68ed0c4e6bcf1a06bd924e0d96731e52513ae1eb Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 6 Mar 2025 14:44:52 -0800 Subject: [PATCH 012/114] Add PVC and BMG as runners --- .github/workflows/sycl-linux-run-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 9bad484b1c12e..5797755934a0c 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -126,6 +126,8 @@ on: - '["cts-cpu"]' - '["Linux", "build"]' - '["cuda"]' + - '["Linux", "bmg"]' + - '["PVC_PERF"]' image: type: choice options: From 3a65b98b4558c56f9aeca5d1b33393715764c361 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 6 Mar 2025 14:49:20 -0800 Subject: [PATCH 013/114] Install dependencies before running UR script --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 9846b5c8bd6c6..41f9e68f3609d 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,7 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" - mkdir -v ./llvm_test_workdir + pip install -r ./devops/scripts/benchmarks/requirements.txt taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline echo "-----" ls From 220121aa4229bb8d2e6517bb84112b48fa14317b Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 6 Mar 2025 14:58:52 -0800 Subject: [PATCH 014/114] Use venv for python packages --- devops/actions/run-tests/benchmark/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 41f9e68f3609d..afd5ede276228 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,6 +90,8 @@ runs: echo "-----" sycl-ls echo "-----" + python3 -m venv .venv + . .venv/bin/activate pip install -r ./devops/scripts/benchmarks/requirements.txt taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline echo "-----" From 37d361cac120e662c1905acd22542014ac1ac73c Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 6 Mar 2025 15:01:21 -0800 Subject: [PATCH 015/114] Install venv before using venv --- devops/actions/run-tests/benchmark/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index afd5ede276228..88f2e75942c4d 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,6 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" + apt install -y python3-venv python3 -m venv .venv . .venv/bin/activate pip install -r ./devops/scripts/benchmarks/requirements.txt From 07f1e107a78f84e320379a5b01e4f92b159964cb Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Fri, 7 Mar 2025 12:07:52 +0000 Subject: [PATCH 016/114] [benchmarks] allow specifying custom results directories --- devops/scripts/benchmarks/html/data.js | 2 ++ devops/scripts/benchmarks/main.py | 12 +++++++++++- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/output_html.py | 2 +- devops/scripts/benchmarks/presets.py | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 devops/scripts/benchmarks/html/data.js diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js new file mode 100644 index 0000000000000..36e076361fe17 --- /dev/null +++ b/devops/scripts/benchmarks/html/data.js @@ -0,0 +1,2 @@ +benchmarkRuns = []; +defaultCompareNames = []; diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 11f02d627a87f..43e0bdf4832b1 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -242,7 +242,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if not options.dry_run: chart_data = {this_name: results} - history = BenchmarkHistory(directory) + results_dir = directory + if options.custom_results_dir: + results_dir = Path(options.custom_results_dir) + history = BenchmarkHistory(results_dir) # limit how many files we load. # should this be configurable? history.load(1000) @@ -444,6 +447,12 @@ def validate_and_parse_env_args(env_args): help="Benchmark preset to run.", default=options.preset.name(), ) + parser.add_argument( + "--results-dir", + type=str, + help="Specify a custom results directory", + default=options.custom_results_dir, + ) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -470,6 +479,7 @@ def validate_and_parse_env_args(env_args): options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory options.preset = preset_get_by_name(args.preset) + options.custom_results_dir = args.results_dir if args.build_igc and args.compute_runtime is None: parser.error("--build-igc requires --compute-runtime to be set") diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 428ab1f13e9af..c567a4a2bda53 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -41,6 +41,7 @@ class Options: build_igc: bool = False current_run_name: str = "This PR" preset: Preset = presets[0] + custom_results_dir = None options = Options() diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index d84fd659beb6c..35fbc2ffb122a 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -40,7 +40,7 @@ def generate_html(benchmark_runs: list, compare_names: list[str]): if i > 0: f.write(",\n") f.write(run.to_json()) - f.write("\n];\n") + f.write("\n]\n") print( f"Upload {data_path} to a location set in config.js remoteDataUrl argument." diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 54727446ecc7d..5d8e187ac0115 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -60,6 +60,6 @@ def suites(self) -> List[str]: def preset_get_by_name(name: str) -> Preset: for p in presets: - if p.name().upper() == name.upper(): + if p.name() == name: return p raise ValueError(f"Preset '{name}' not found.") From 64cf79cb84e8f4a2bc108a8b93cb264adeef6579 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Fri, 7 Mar 2025 15:17:23 +0000 Subject: [PATCH 017/114] [benchmarks] sort runs by date for html output --- devops/scripts/benchmarks/html/data.js | 16 +++++++++++++++- devops/scripts/benchmarks/output_html.py | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js index 36e076361fe17..bd2a4bb9c6f36 100644 --- a/devops/scripts/benchmarks/html/data.js +++ b/devops/scripts/benchmarks/html/data.js @@ -1,2 +1,16 @@ -benchmarkRuns = []; +benchmarkRuns = [ +{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"}, +{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"} +]; + defaultCompareNames = []; diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 35fbc2ffb122a..53dd4b1e8f968 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -11,6 +11,7 @@ def generate_html(benchmark_runs: list, compare_names: list[str]): # create path to data.js in html folder html_path = os.path.join(os.path.dirname(__file__), "html") + benchmark_runs.sort(key=lambda run: run.date, reverse=True) if options.output_html == "local": data_path = os.path.join(html_path, "data.js") From 6c28d333dadab0eccd40a80f2f84aa50107e3b93 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 10 Mar 2025 11:21:53 +0000 Subject: [PATCH 018/114] simplify presets, remove suites if all set --- devops/scripts/benchmarks/html/scripts.js | 4 +- devops/scripts/benchmarks/main.py | 10 +-- devops/scripts/benchmarks/options.py | 4 +- devops/scripts/benchmarks/presets.py | 91 ++++++++--------------- 4 files changed, 41 insertions(+), 68 deletions(-) diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 7b8b4d742cca2..7ba00738e727a 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -6,6 +6,7 @@ // Core state let activeRuns = new Set(defaultCompareNames); let chartInstances = new Map(); +let suiteNames = new Set(); let timeseriesData, barChartsData, allRunNames; // DOM Elements @@ -306,7 +307,7 @@ function updateURL() { url.searchParams.delete('regex'); } - if (activeSuites.length > 0) { + if (activeSuites.length > 0 && activeSuites.length != suiteNames.size) { url.searchParams.set('suites', activeSuites.join(',')); } else { url.searchParams.delete('suites'); @@ -444,7 +445,6 @@ function setupRunSelector() { function setupSuiteFilters() { suiteFiltersContainer = document.getElementById('suite-filters'); - const suiteNames = new Set(); benchmarkRuns.forEach(run => { run.results.forEach(result => { suiteNames.add(result.suite); diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 43e0bdf4832b1..91f84917f8698 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -17,7 +17,7 @@ from history import BenchmarkHistory from utils.utils import prepare_workdir from utils.compute_runtime import * -from presets import preset_get_by_name, presets +from presets import enabled_suites, presets import argparse import re @@ -164,7 +164,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): failures = {} for s in suites: - if s.name() not in options.preset.suites(): + if s.name() not in enabled_suites(options.preset): continue suite_benchmarks = s.benchmarks() @@ -443,9 +443,9 @@ def validate_and_parse_env_args(env_args): parser.add_argument( "--preset", type=str, - choices=[p.name() for p in presets], + choices=[p for p in presets.keys()], help="Benchmark preset to run.", - default=options.preset.name(), + default=options.preset, ) parser.add_argument( "--results-dir", @@ -478,7 +478,7 @@ def validate_and_parse_env_args(env_args): options.current_run_name = args.relative_perf options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory - options.preset = preset_get_by_name(args.preset) + options.preset = args.preset options.custom_results_dir = args.results_dir if args.build_igc and args.compute_runtime is None: diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index c567a4a2bda53..7600942acd1e5 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from enum import Enum -from presets import Preset, presets +from presets import presets class Compare(Enum): LATEST = "latest" @@ -40,7 +40,7 @@ class Options: compute_runtime_tag: str = "25.05.32567.18" build_igc: bool = False current_run_name: str = "This PR" - preset: Preset = presets[0] + preset: str = "Full" custom_results_dir = None diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 5d8e187ac0115..7f5dc8d78460a 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -3,63 +3,36 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from typing import List, Type +presets: dict[str, list[str]] = { + "Full": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + "UMF", + ], + "SYCL": [ + "Compute Benchmarks", + "llama.cpp bench", + "SYCL-Bench", + "Velocity Bench", + ], + "Minimal": [ + "Compute Benchmarks", + ], + "Normal": [ + "Compute Benchmarks", + "llama.cpp bench", + "Velocity Bench", + ], + "Test": [ + "Test Suite", + ], +} + +def enabled_suites(preset: str) -> list[str]: + try: + return presets[preset] + except KeyError: + raise ValueError(f"Preset '{preset}' not found.") -class Preset: - def description(self) -> str: - raise NotImplementedError - - def name(self) -> str: - return self.__class__.__name__ - - def suites(self) -> List[str]: - raise NotImplementedError - -class Full(Preset): - def description(self) -> str: - return "All available benchmarks." - - def suites(self) -> List[str]: - return [ - "Compute Benchmarks", - "llama.cpp bench", - "SYCL-Bench", - "Velocity Bench", - "UMF", - ] - -class SYCL(Preset): - def description(self) -> str: - return "All available benchmarks related to SYCL." - - def suites(self) -> List[str]: - return ["Compute Benchmarks", "llama.cpp bench", "SYCL-Bench", "Velocity Bench"] - -class Minimal(Preset): - def description(self) -> str: - return "Short microbenchmarks." - - def suites(self) -> List[str]: - return ["Compute Benchmarks"] - -class Normal(Preset): - def description(self) -> str: - return "Comprehensive mix of microbenchmarks and real applications." - - def suites(self) -> List[str]: - return ["Compute Benchmarks", "llama.cpp bench", "Velocity Bench"] - -class Test(Preset): - def description(self) -> str: - return "Noop benchmarks for framework testing." - - def suites(self) -> List[str]: - return ["Test Suite"] - -presets = [Full(), SYCL(), Minimal(), Normal(), Test()] - -def preset_get_by_name(name: str) -> Preset: - for p in presets: - if p.name() == name: - return p - raise ValueError(f"Preset '{name}' not found.") From e15b94ffa6a0bc297b437f35f8afa3885befdb57 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 10 Mar 2025 11:25:23 +0000 Subject: [PATCH 019/114] [benchmarks] use python venv for scripts --- .github/workflows/ur-benchmarks-reusable.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index 6e8a4ea535d15..e3a754753ecf8 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -80,9 +80,13 @@ jobs: git checkout origin/pr/${{ inputs.pr_no }}/merge git rev-parse origin/pr/${{ inputs.pr_no }}/merge - - name: Install pip packages + - name: Create virtual environment + run: python -m venv .venv + + - name: Activate virtual environment and install pip packages run: | - pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt + source .venv/bin/activate + pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt - name: Configure SYCL run: > @@ -139,6 +143,7 @@ jobs: working-directory: ${{ github.workspace }} id: benchmarks run: > + source .venv/bin/activate && taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py ~/llvm_bench_workdir --sycl ${{ github.workspace }}/sycl_build From 78fd037de376d59a404965817d464edd31bb6890 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 10 Mar 2025 07:53:36 -0700 Subject: [PATCH 020/114] Run apt with sudo --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 88f2e75942c4d..5c343f2ff8e26 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,7 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" - apt install -y python3-venv + sudo apt install -y python3-venv python3 -m venv .venv . .venv/bin/activate pip install -r ./devops/scripts/benchmarks/requirements.txt From 82b6e55be0f627a301117da05de16bc2ed723b70 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 10 Mar 2025 08:16:58 -0700 Subject: [PATCH 021/114] Ignore "missing" apt packages in workflow --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 5c343f2ff8e26..87a629dc60fd6 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,7 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" - sudo apt install -y python3-venv + sudo apt install -y --ignore-missing python3-venv python3 -m venv .venv . .venv/bin/activate pip install -r ./devops/scripts/benchmarks/requirements.txt From 162cba01ca234ab7645cf59c9d7b82d512870c69 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 10 Mar 2025 09:48:32 -0700 Subject: [PATCH 022/114] Change pip to install to user --- devops/actions/run-tests/benchmark/action.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 87a629dc60fd6..34fdf178afe0e 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,10 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" - sudo apt install -y --ignore-missing python3-venv - python3 -m venv .venv - . .venv/bin/activate - pip install -r ./devops/scripts/benchmarks/requirements.txt + pip install --user -r ./devops/scripts/benchmarks/requirements.txt taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline echo "-----" ls From 848f7410b0e7823eb94d288d549474785a339a30 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 10 Mar 2025 09:51:09 -0700 Subject: [PATCH 023/114] Ignore system controlled python env --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 34fdf178afe0e..79cb2bf4aea5b 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -90,7 +90,7 @@ runs: echo "-----" sycl-ls echo "-----" - pip install --user -r ./devops/scripts/benchmarks/requirements.txt + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline echo "-----" ls From 918604ebd0a22f51be67055b2eea7c877e84a943 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 10 Mar 2025 13:08:09 -0700 Subject: [PATCH 024/114] [CI] use realpaths when referring to SYCL --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 79cb2bf4aea5b..92c948ffd7168 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -91,7 +91,7 @@ runs: sycl-ls echo "-----" pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl ./toolchain --save baseline + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline echo "-----" ls # - name: Push compute-benchmarks results From 72d873034ae844678090c0f4c7082a2a8d893b99 Mon Sep 17 00:00:00 2001 From: Ian Li Date: Mon, 10 Mar 2025 18:10:26 -0400 Subject: [PATCH 025/114] [CI] use minimal preset when running benchmarks --- devops/actions/run-tests/benchmark/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 92c948ffd7168..c10a163261c13 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -91,7 +91,7 @@ runs: sycl-ls echo "-----" pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal echo "-----" ls # - name: Push compute-benchmarks results From 066f5a60c102669878188cc76532c0f57e2e55bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 12 Mar 2025 11:33:44 +0100 Subject: [PATCH 026/114] [CI] Allow 2 bench scripts locations (#17394) On PRs based on main, the scripts location is "old" and not accesible. Pick location based on the dir existance. Step 'gather info' is in a 'weird' location, so solve it with 2 tries to execute the script. --- .github/workflows/ur-benchmarks-reusable.yml | 23 +++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index e3a754753ecf8..d7c32edfdfc2a 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -80,13 +80,27 @@ jobs: git checkout origin/pr/${{ inputs.pr_no }}/merge git rev-parse origin/pr/${{ inputs.pr_no }}/merge + # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location + - name: Establish bench scripts location + run: | + if [ -d "${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" ]; then + echo "Bench scripts are in devops/scripts" + echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/devops/scripts/benchmarks" >> $GITHUB_ENV + elif [ -d "${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" ]; then + echo "Bench scripts are in unified-runtime/scripts" + echo "BENCH_SCRIPTS_DIR=${{github.workspace}}/sycl-repo/unified-runtime/scripts/benchmarks" >> $GITHUB_ENV + else + echo "Bench scripts are absent...?" + exit 1 + fi + - name: Create virtual environment run: python -m venv .venv - name: Activate virtual environment and install pip packages run: | source .venv/bin/activate - pip install -r ${{github.workspace}}/sycl-repo/devops/scripts/benchmarks/requirements.txt + pip install -r ${BENCH_SCRIPTS_DIR}/requirements.txt - name: Configure SYCL run: > @@ -144,7 +158,7 @@ jobs: id: benchmarks run: > source .venv/bin/activate && - taskset -c "${{ env.CORES }}" ${{ github.workspace }}/sycl-repo/devops/scripts/benchmarks/main.py + taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py ~/llvm_bench_workdir --sycl ${{ github.workspace }}/sycl_build --ur ${{ github.workspace }}/ur_install @@ -198,6 +212,9 @@ jobs: path: benchmark_results_${{ inputs.pr_no }}.html key: benchmark-results-${{ inputs.pr_no }}-${{ matrix.adapter.str_name }}-${{ github.run_id }} + # TODO: As long as we didn't merge this workflow into main, we should allow both scripts location - name: Get information about platform if: ${{ always() }} - run: ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh + run: | + ${{github.workspace}}/sycl-repo/devops/scripts/get_system_info.sh || true + ${{github.workspace}}/sycl-repo/unified-runtime/.github/scripts/get_system_info.sh || true From 18e5291a405ce1c912d0df3ac02aa5446c099ef8 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 12 Mar 2025 14:28:08 +0000 Subject: [PATCH 027/114] add ulls compute benchmarks --- devops/scripts/benchmarks/benches/compute.py | 62 ++++++++++++++++++++ devops/scripts/benchmarks/html/data.js | 15 +---- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index d35a8e2791648..92818cc00fad2 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -81,6 +81,10 @@ def benchmarks(self) -> list[Benchmark]: GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5), GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100), GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100), + UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256), + UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256), + UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1), + UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1), ] if options.ur is not None: @@ -531,3 +535,61 @@ def bin_args(self) -> list[str]: "--withCopyOffload=1", "--immediateAppendCmdList=0", ] + +class UllsEmptyKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, wgc, wgs): + self.wgc = wgc + self.wgs = wgs + self.runtime = runtime + super().__init__( + bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel" + ) + + def explicit_group(self): + return f"EmptyKernel {self.wgc} {self.wgs}" + + def description(self) -> str: + return "" + + def name(self): + return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--wgs={self.wgs}", + f"--wgc={self.wgs}", + ] + +class UllsKernelSwitch(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents): + self.count = count + self.kernelTime = kernelTime + self.barrier = barrier + self.hostVisible = hostVisible + self.ctrBasedEvents = ctrBasedEvents + self.runtime = runtime + self.ioq = ioq + super().__init__( + bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch" + ) + + def explicit_group(self): + return f"KernelSwitch {self.count} {self.kernelTime}" + + def description(self) -> str: + return "" + + def name(self): + return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=1000", + f"--count={self.count}", + f"--kernelTime={self.kernelTime}", + f"--barrier={self.barrier}", + f"--hostVisible={self.hostVisible}", + f"--ioq={self.ioq}", + f"--ctrBasedEvents={self.ctrBasedEvents}", + ] diff --git a/devops/scripts/benchmarks/html/data.js b/devops/scripts/benchmarks/html/data.js index bd2a4bb9c6f36..a5b96c72834ba 100644 --- a/devops/scripts/benchmarks/html/data.js +++ b/devops/scripts/benchmarks/html/data.js @@ -1,16 +1,3 @@ -benchmarkRuns = [ -{"results": [{"label": "Memory Bandwidth 1", "value": 2040.8882991390067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 34.457610431783294, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2529.3774380653363, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 135.81200692232412, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2719.8110231537125, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.32053564116694, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3227.632839523546, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.72010893383725, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3514.4167999909496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.05909225714902, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4012.1042760150494, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.80137392913923, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.58153862508325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.155836817249414, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 125.92477357063481, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.26567067278589, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 133.83240260210536, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.763812811796768, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 156.26773548103202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.861842969825087, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 167.3255955272463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.48929969639468, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 220.49290675578928, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.900958177754223, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1480.3642886335488, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.14840825777334, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1757.3646882744213, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 94.97795059309506, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.760057641498, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 32.20444501013399, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2465.113025920638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.56485787432257, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2646.9736547641232, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.21303041397977, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2797.023188351585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 49.789332852672736, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3072.2144224296385, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.0435838937749, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3645.5868819428038, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 186.63713430054412, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4365.696214338321, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 70.80581668642078, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4712.424975602965, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 237.2219789185776, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5490.717140126425, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 102.98496803461086, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5899.69529717778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 365.8281107263356, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.0033673842501, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.641649890532847, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 307.2248975403931, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.106532892713558, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 364.94516101524755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.487184395370704, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 415.1825140704191, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.837117436872584, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 440.50926932373267, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.400527065008065, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 513.2345717731824, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.92653205921289, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "13462f5f6", "github_repo": "pbalcer/llvm", "date": "2025-03-07T14:04:12.881983+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2061.891541779758, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.43418752146129, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2418.370570307403, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 23.41390025375235, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2759.548256219084, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.04750469338484, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3268.9851244693905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 179.65245219605663, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3573.980571932074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.27214661339116, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3913.178724155857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 187.41955301323392, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.66099349103821, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.949437203365676, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 116.94033117978861, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.670085238288802, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 141.8516673102208, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.49397378099331, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 154.47973126513787, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.7581068444608, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 194.47100906915202, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.603348605481727, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 189.26766261792042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.80270435298115, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1548.0366148601304, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 22.556620202365167, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1804.0612981627564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 130.9251933818919, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2117.020524938414, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.18576268885376, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2340.6226309817375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 45.23157229205414, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2657.435335624127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 178.93395582367347, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3100.1660243239976, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 59.26661177659249, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2973.0427624231074, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.47659228805884, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3499.50915562217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 202.92584935080856, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 3906.063346066898, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 58.67588644266499, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4776.315860317371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 337.294287649651, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5294.515316259128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 310.6460231086305, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5883.364679907042, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 433.9862905464425, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.81458542543336, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.259893742055365, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.324345463754, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.537217356717523, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.317230088579, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.694135619195492, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 404.94767826325585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.03967001195265, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 448.68781789313334, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 37.68940635002855, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 479.7145913704619, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.819332357308436, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "52dba2a69", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:48:42.727410+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 1944.712475358489, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.3517754822544, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2494.968647183357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.62096222735542, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2827.96959627778, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 161.09215987917975, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3246.4235207906368, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.8841813593721, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3415.497030173447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 207.51586434688852, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3947.173405699456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.35155081978226, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.27501062264594, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.62997659996243, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.58001802257706, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.223861407928204, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 152.60658050771121, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.644344734962786, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.8365309090243, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 1.9279203474927489, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 179.69325992783263, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.567971182588, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 190.29777300705297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.545022416801082, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1520.7774888153917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.44363449416652, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1841.9402998174073, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.99472050334539, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2063.573372718332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.76799421011498, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2411.1299338593512, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.55096124823987, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2636.4186072468115, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 136.15002376636508, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3012.5429889405455, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 220.10345804333795, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2912.3694681990496, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.24541212948046, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3634.840665141933, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.90393111568957, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4221.70291649172, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 245.0992536434908, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4563.9141528786395, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.15450755100105, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5449.735755715656, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 283.67446282594074, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6103.288896553245, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 497.0264510256128, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.1162346822855, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.349695364944424, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.0848370650819, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.091832690685845, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 368.2173261284879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.911533458328602, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 400.932628864893, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.298171550718916, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 465.45774333645085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.008461742975705, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 494.19807030391513, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 31.290996975880688, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "a15019b41", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:42:53.963514+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 1971.9235866578244, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 107.4119769093561, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2381.359513168276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.1820922785026, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.164331241929, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 152.82523354152792, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3207.788500404049, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.98152700892044, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3612.0807949868076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 238.29524372895352, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4041.187128183399, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 244.78707963276804, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 110.17204676929632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.7488792731298, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 110.04874446073308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.111000761355566, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 139.80726599267632, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.761524761674202, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 167.65946901880108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.961270297928603, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 175.07359940308456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.654053542209933, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 188.92280945420617, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.32935674842163, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1498.3892879578825, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 72.76968286004643, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1802.449855059067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.35877323708975, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2141.6873668536814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 109.1211656598374, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2481.234320462784, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.29288921121633, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2592.315439130817, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.50618527958042, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2986.630322110839, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 134.14155338256344, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3023.0069882524413, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.0861804957972, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3491.2685416445424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.82885721897767, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4267.684357012167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 258.535523100285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4833.943488351638, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 288.5816839229039, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5460.197706764911, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3526928188145, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6211.479518188777, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 448.53753098503586, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.60974821168077, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.966964309950376, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 299.08129766722294, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.458275817843905, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.13218478336375, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.88260705972654, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 368.43448345001804, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.0293359056239115, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 462.81719243303485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.16929631101137, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 498.84520836251704, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.943372517547482, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T13:37:14.849756+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2013.395440288061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.82142134259605, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2432.2596423503755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.39327416892019, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2674.0160578165187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 194.41545828080007, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3063.9534832147688, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 205.67379884852215, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3584.672342581568, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.67353531675607, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4125.180591214061, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 273.2758074594961, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 106.37633318466106, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.247008579218756, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 111.99312616915259, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.168574067720925, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 148.4561344088857, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.59295361046173, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.0852714518944, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.380760230770385, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 187.04637816265117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.658051327117878, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.16012739025047, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.6645406941134, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1505.183607875215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 93.57793481885791, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1786.864494698917, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 122.1347513455775, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2104.854088217566, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.42311038597916, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2373.3921231994896, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.26128420435194, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2680.62360254391, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 184.49504836547473, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2957.0424468763595, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.13611056356788, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3024.0197501043167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.3618836169113, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3658.757514096598, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 149.8130576669698, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4336.791327103415, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.10403249537495, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4594.550884548686, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 339.1255595981214, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5619.202557626439, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.7429329550701, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6145.450470023206, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 397.2604324517752, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 242.7598020860891, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 4.503364581661284, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.888600531132, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.878793912236713, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 333.6634181341022, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.945944118430873, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 386.559044229885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.909652211845977, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 433.56985826314695, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.16786402230611, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 475.40739140041325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.532574731353257, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "461343280", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:55:23.831147+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2036.879511822098, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.49123010982262, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2358.605120547564, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 148.31108709325747, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2782.758869742085, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.07850443580668, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3211.303768537726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.64603088602735, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3726.2788114170226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 203.68455828387613, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.451298605878, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 214.04589132488434, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 97.81132147931729, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.4388910648024, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.47877514885052, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.850644538343035, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 138.3636972712076, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.453475343660529, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 159.0926504710019, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.406923335827646, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.58148765355367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.719641698346496, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 213.78191902260386, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.56513730925096, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1508.4347909839335, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.90540186941426, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1765.9068352126365, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 83.00665769599348, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2079.3459975121978, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 129.25159465427944, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2370.0084472113276, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 110.2565848005119, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2598.252204318904, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.98495052891545, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2969.9956302642463, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.29990951898574, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2929.264699223759, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.51544383864362, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3605.747338045167, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 208.72266927612378, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4169.092383202888, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 221.65028734739832, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4342.400927657371, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 10.226688336643164, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5335.841345368252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 322.69883423073804, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5891.394678938614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 442.78667173376004, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 253.57797655240805, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.797128115716593, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.17543480746747, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.95344804548685, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 353.0001179231053, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.30650858255822, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 393.61574583773006, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.460697740276498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 411.7013399749935, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.8389196983489504, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.65540609194693, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 32.30948655635452, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "59d88dae7", "github_repo": "pbalcer/llvm", "date": "2025-03-07T12:49:15.115091+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2195.552651542308, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 40.940741416639945, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2207.459054225258, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 31.681573504875555, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2791.852261483982, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 145.62649882463464, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3134.2219672329984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.02514783326134, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3767.7635130447607, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.24591155046014, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3942.521187753682, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 228.82977417585033, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.809622959215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 11.473952358992248, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 123.83059821116996, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.60938099214386, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.93982647796008, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.29049957344098, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.82319101117525, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.247880470121356, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 177.31431566581708, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 21.811044444821867, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 217.37228664795157, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.08328831134193, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1549.1191711106521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 100.63323493526255, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1748.2566655197188, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 125.49717792070385, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2038.1492661325733, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.90033883093976, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2435.624131184369, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.4633804704484, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2625.115911806016, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 142.00862169479268, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3041.342229934156, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 168.4496950355338, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2937.258997841614, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 155.30016809201283, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3538.971007263721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.88178732022945, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4063.7149977059134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.4858199901966, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4911.07807577187, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 250.7864115701977, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5377.1846970238585, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 306.0068346396366, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6245.575950509069, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 298.97595013407596, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 247.84781710540977, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.78683687151215, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 295.5304009113721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.652016327478979, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.4112170450192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.461446948742276, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 395.8114457367419, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.580352011562915, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 449.871031326954, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 30.053959147816688, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 504.6580132142422, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 29.41875628689506, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "PR1234", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:58:34.927820+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 1958.784118312001, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.57484819538932, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2440.601149884664, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 158.0533346583976, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2721.428822801097, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 249.6308268113163, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3177.0055972660625, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 146.92056751044575, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3549.5230383598678, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 234.94466209634086, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3978.0960993946674, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 188.9037213571779, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 103.09498391363023, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.02579026210347, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 109.08496102147217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.749411126280116, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 161.69893522471634, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.4430257786783773, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 162.34529521039352, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.7714067922127894, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 170.86523239479655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.608020176521034, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 181.05706010508592, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.277369339946695, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1463.0649649228315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.83848693136936, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1864.683141120113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 86.4841206172361, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2130.758830413485, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.54699391922728, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2381.8935399566794, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 144.76036506870986, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2662.7577579295776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 132.5724441198216, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3078.79130536842, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 17.097525165274803, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2955.7832223272444, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 171.2189444201398, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3688.781307878483, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 65.65926515650821, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4183.4728233450305, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 101.81987978181542, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4939.824132342117, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 289.1390313704078, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5502.544756998508, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 379.9176358151893, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5664.321185867887, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 103.74897438065652, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 246.62407640713522, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.589667669507943, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08780541388853, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.339251126835014, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 349.13408375848826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 6.707215404345545, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 420.6620028708826, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.922885386248023, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 470.0593095392814, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.595229921387679, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 495.115546467953, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.928558698066297, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:43.925526+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2171.099861571096, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 19.23255817429395, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2429.228219203666, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 181.04518738452575, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2756.5078091010796, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 126.73272767497978, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3197.349485288246, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 154.47555387593712, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3607.973454642879, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 213.0597134090529, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3925.314914910963, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 293.48112660476045, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 104.57782310281735, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.873834118675967, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 129.5117553518436, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 12.407159402934873, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 142.08007511017124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.930090749895689, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 157.0629031829932, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.918041427401283, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.6427038678885, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.828269431125875, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 200.60322195597215, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.338879356636095, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1491.980189873357, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 71.9836340794669, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1794.0628090299717, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 14.307364673980224, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2192.3591192326044, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 114.60420372385168, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2422.202702788314, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 119.26859163162072, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2770.8727103546726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 195.12079821799085, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2951.282362921916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 128.2254379990313, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3039.27661040724, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.6539091592498, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3578.211797262128, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 159.14128724739464, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4128.29686489867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 223.4100922139098, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4848.219925955905, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 77.93231029690887, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5070.191606088231, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.94019467972001, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5966.489310951252, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 336.7173682128105, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 254.57850713986198, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.385164783606097, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 304.8091397808394, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.103188082400504, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 350.1613069208256, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.345582528912242, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 411.1456865029576, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.86244360659498, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.04740645126986, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.597587190328635, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 545.743901896845, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 8.94286171044266, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T11:57:27.051913+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 1993.661134316776, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 123.85525126992296, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2301.0905948917325, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.48673687735095, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2873.4628362191897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 162.61249284171058, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3238.735403505523, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.51716037758475, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3728.4508889231124, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 118.24607483750995, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 4034.9082581910916, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 74.76961240079906, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 100.88113187316719, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.905008641590433, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 121.61102013493655, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.792042693243397, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 140.99528044475127, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.222627363561376, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 163.077114107551, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.17919680914877, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 188.59968240327134, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 16.466938787214904, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 198.73690996443867, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.07228063106639, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1456.8721146219054, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 97.05357208107213, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1760.0202375360182, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 113.83470167982718, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2033.3289371002388, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 131.96155202489578, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2408.2974437457224, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 157.38445697767614, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2693.2667748312374, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 147.88552510962938, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2991.3045632907692, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 36.616739773559836, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3006.5513639744195, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.20153435546402, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3946.7240883975173, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 24.834845762711534, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4471.79595749108, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 222.54023025674027, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4746.352137751869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 299.0771752770653, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5465.286069604949, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 348.6918957133431, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5823.519621687581, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 294.3249644414966, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 249.32918263045667, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.03544118455393, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 288.1546272324227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.7727205750953, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 363.3503259942238, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.098142551778466, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 392.91985489944227, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.846918288877376, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 456.7540443475017, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.728347618091988, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 499.13159330438293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.2322764193576, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline2", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:48:34.707858+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2038.9496500003788, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 117.27052133056621, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2294.3238192937456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 137.05216178962178, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2816.7462067242177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 120.10657812200931, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3330.947955167447, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 165.07867992457224, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3427.804220062, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.398802753262366, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3931.7861541695424, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 259.7643410153898, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 101.89870179257153, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 9.924103694663449, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 124.9849961475332, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 15.073706451113821, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 150.17912140564707, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 2.831834198448414, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 165.06404530951897, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 19.098638603407267, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 189.4271367424946, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 17.049029334825786, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 211.70091863399844, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 24.393712112471537, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1534.395057650628, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 81.6427334392383, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1778.474541262558, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 42.56143420705744, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2133.7461366070925, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 116.35913144113613, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2459.5790315346367, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.71322011411286, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2637.4334475618302, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 183.30427116704686, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 2944.098595726341, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 170.72289928237976, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 2907.9632013559226, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 174.53757173689922, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3509.107421580347, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 236.8620853533764, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4200.093284524192, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 239.58028996799285, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4713.504209113087, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 227.25719976419228, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5049.944494674869, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 96.03307008996549, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 6191.498973826217, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 317.5921715209765, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 248.80616580373456, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.592467485447356, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 301.08520837227366, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 10.677266179208607, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 357.6038589068661, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.454584817104773, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 385.0134083066721, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 27.301075636602707, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 444.0720671004903, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.366607976819555, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 544.9286314848067, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 5.8252101632892845, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:43:24.047048+00:00"}, -{"results": [{"label": "Memory Bandwidth 1", "value": 2021.1035365873993, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 69.72840561483144, "name": "Memory Bandwidth 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 1."}, {"label": "Memory Bandwidth 2", "value": 2338.909416436906, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 140.64663652969023, "name": "Memory Bandwidth 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 2."}, {"label": "Memory Bandwidth 3", "value": 2858.077160911349, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 192.0675550591675, "name": "Memory Bandwidth 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 3."}, {"label": "Memory Bandwidth 4", "value": 3306.833623604521, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 56.99029424270755, "name": "Memory Bandwidth 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 4."}, {"label": "Memory Bandwidth 5", "value": 3627.5542312476477, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 124.9433053351406, "name": "Memory Bandwidth 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 5."}, {"label": "Memory Bandwidth 6", "value": 3950.086638208113, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 226.7800326425516, "name": "Memory Bandwidth 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Memory Bandwidth 6."}, {"label": "Latency 1", "value": 96.47479639005672, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.581115036930171, "name": "Latency 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 1."}, {"label": "Latency 2", "value": 112.93833387666766, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.456175417231416, "name": "Latency 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 2."}, {"label": "Latency 3", "value": 127.96521280400299, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 7.881167162370817, "name": "Latency 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 3."}, {"label": "Latency 4", "value": 164.06646826051218, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 20.400563021933642, "name": "Latency 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 4."}, {"label": "Latency 5", "value": 172.50207971758653, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.59514547087479, "name": "Latency 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 5."}, {"label": "Latency 6", "value": 206.57752612959177, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 23.6206498096027, "name": "Latency 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Latency 6."}, {"label": "Throughput 1", "value": 1450.762861653755, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 62.85051722934544, "name": "Throughput 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 1."}, {"label": "Throughput 2", "value": 1744.8736145848297, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 28.4724370062761, "name": "Throughput 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 2."}, {"label": "Throughput 3", "value": 2137.935073637293, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 133.15696927062444, "name": "Throughput 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 3."}, {"label": "Throughput 4", "value": 2405.7909943176865, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 138.83795715557775, "name": "Throughput 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 4."}, {"label": "Throughput 5", "value": 2660.942840886126, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 160.5879766560021, "name": "Throughput 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 5."}, {"label": "Throughput 6", "value": 3070.783714494726, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 225.80178015382134, "name": "Throughput 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Throughput 6."}, {"label": "FLOPS 1", "value": 3021.0961116313642, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 63.199028430669784, "name": "FLOPS 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 1."}, {"label": "FLOPS 2", "value": 3562.444757764406, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 233.25324926372082, "name": "FLOPS 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 2."}, {"label": "FLOPS 3", "value": 4147.683102448584, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 267.47351186248994, "name": "FLOPS 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 3."}, {"label": "FLOPS 4", "value": 4681.79862307404, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 201.00316493809274, "name": "FLOPS 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 4."}, {"label": "FLOPS 5", "value": 5257.332484362561, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 324.82272792943763, "name": "FLOPS 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 5."}, {"label": "FLOPS 6", "value": 5860.230588756176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Foo Group", "stddev": 370.86153080312647, "name": "FLOPS 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for FLOPS 6."}, {"label": "Cache Miss Rate 1", "value": 245.42900602601247, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 13.361128649495964, "name": "Cache Miss Rate 1", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 1."}, {"label": "Cache Miss Rate 2", "value": 300.16320013554315, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 18.935265770560466, "name": "Cache Miss Rate 2", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 2."}, {"label": "Cache Miss Rate 3", "value": 345.53233993081176, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 14.5441134792233, "name": "Cache Miss Rate 3", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 3."}, {"label": "Cache Miss Rate 4", "value": 397.50592062832635, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 22.267205299179718, "name": "Cache Miss Rate 4", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 4."}, {"label": "Cache Miss Rate 5", "value": 426.56360681512984, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 28.587460065910978, "name": "Cache Miss Rate 5", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 5."}, {"label": "Cache Miss Rate 6", "value": 493.39520093238633, "command": ["test", "--arg1", "foo"], "env": {"A": "B"}, "stdout": "no output", "passed": true, "unit": "ms", "explicit_group": "Bar Group", "stddev": 26.049730400867045, "name": "Cache Miss Rate 6", "lower_is_better": true, "suite": "Test Suite", "description": "This is a test benchmark for Cache Miss Rate 6."}], "name": "baseline", "git_hash": "ce45ac543", "github_repo": "pbalcer/llvm", "date": "2025-03-07T10:40:45.136466+00:00"} -]; +benchmarkRuns = []; defaultCompareNames = []; From 237750e9dc03ce8534d373c984e9fd8c56a72d4f Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Tue, 11 Mar 2025 15:07:14 -0700 Subject: [PATCH 028/114] [CI][Benchmark] Decouple results from existing file structure, fetch results from git instead of local --- devops/actions/run-tests/benchmark/action.yml | 112 +++++++++--------- devops/scripts/benchmarks/main.py | 23 +++- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/output_html.py | 4 +- 4 files changed, 81 insertions(+), 59 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index c10a163261c13..f90808f730787 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -27,16 +27,25 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} + RUNNER_NAME: ${{ runner.name }} run: | case "$RUNNER_TAG" in - '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; + '["PVC_PERF"]' ) ;; *) echo "#" - echo "# WARNING: Only gen12/pvc on Linux is fully supported." + echo "# WARNING: Only specific tuned runners are fully supported." echo "# This workflow is not guaranteed to work with other runners." echo "#" ;; esac + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV + # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in level_zero:*) ;; @@ -46,11 +55,11 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + - name: Compute CPU core range to run benchmarks on shell: bash run: | - # Taken from ur-benchmark-reusable.yml: - # Compute the core range for the first NUMA node; second node is used by # UMF. Skip the first 4 cores as the kernel is likely to schedule more # work on these. @@ -67,65 +76,62 @@ runs: ZE_AFFINITY_MASK=0 echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks shell: bash run: | - cat << EOF - # - # NOTE TO DEVELOPERS: - # - - Check latter steps of the workflow: This job produces an artifact with: - - benchmark results from passing/failing tests - - log containing all failing (too slow) benchmarks - - log containing all erroring benchmarks - - While this step in the workflow provides debugging output describing this - information, it might be easier to inspect the logs from the artifact - instead. - - EOF - export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" + # TODO generate summary + display helpful message here export CMPLR_ROOT=./toolchain echo "-----" sycl-ls echo "-----" pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$(realpath ./llvm_test_workdir)" --sycl "$(realpath ./toolchain)" --save baseline --preset Minimal + echo "-----" + mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save baseline \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --preset Minimal echo "-----" ls -# - name: Push compute-benchmarks results -# if: always() -# shell: bash -# run: | -# # TODO -- waiting on security clearance -# # Load configuration values -# $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) -# -# cd "./llvm-ci-perf-results" -# git config user.name "SYCL Benchmarking Bot" -# git config user.email "sys_sycl_benchmarks@intel.com" -# git pull -# git add . -# # Make sure changes have been made -# if git diff --quiet && git diff --cached --quiet; then -# echo "No new results added, skipping push." -# else -# git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" -# git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" -# fi - - name: Find benchmark result artifact here + - name: Push compute-benchmarks results if: always() shell: bash run: | - cat << EOF - # - # Artifact link for benchmark results here: - # - EOF - - name: Archive compute-benchmark results - if: always() - uses: actions/upload-artifact@v4 - with: - name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) - path: ./artifact + # TODO redo configuration + # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + + cd "./llvm-ci-perf-results" + git config user.name "SYCL Benchmarking Bot" + git config user.email "sys_sycl_benchmarks@intel.com" + git pull + git add . + # Make sure changes have been made + if git diff --quiet && git diff --cached --quiet; then + echo "No new results added, skipping push." + else + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci + fi +# - name: Find benchmark result artifact here +# if: always() +# shell: bash +# run: | +# cat << EOF +# # +# # Artifact link for benchmark results here: +# # +# EOF +# - name: Archive compute-benchmark results +# if: always() +# uses: actions/upload-artifact@v4 +# with: +# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) +# path: ./artifact diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 91f84917f8698..1a15e5407daf3 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -265,11 +265,15 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): this_name, chart_data, failures, options.output_markdown ) - with open("benchmark_results.md", "w") as file: + md_path = options.output_directory + if options.output_directory is None: + md_path = os.getcwd() + + with open(os.path.join(md_path, "benchmark_results.md"), "w") as file: file.write(markdown_content) print( - f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md" + f"Markdown with benchmark results has been written to {md_path}/benchmark_results.md" ) saved_name = save_name if save_name is not None else this_name @@ -283,7 +287,10 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): compare_names.append(saved_name) if options.output_html: - generate_html(history.runs, compare_names) + html_path = options.output_directory + if options.output_directory is None: + html_path = os.path.join(os.path.dirname(__file__), "html") + generate_html(history.runs, compare_names, html_path) def validate_and_parse_env_args(env_args): @@ -398,6 +405,12 @@ def validate_and_parse_env_args(env_args): const=options.output_html, choices=["local", "remote"], ) + parser.add_argument( + "--output-dir", + type=str, + help="Location for output files, if --output-html or --output_markdown was specified.", + default=None + ) parser.add_argument( "--dry-run", help="Do not run any actual benchmarks", @@ -486,6 +499,10 @@ def validate_and_parse_env_args(env_args): if args.compute_runtime is not None: options.build_compute_runtime = True options.compute_runtime_tag = args.compute_runtime + if args.output_dir is not None: + if not os.path.isdir(args.output_dir): + parser.error("Specified --output-dir is not a valid path") + options.output_directory = os.path.abspath(args.output_dir) benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 7600942acd1e5..332d1615bc78d 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -31,6 +31,7 @@ class Options: compare_max: int = 10 # average/median over how many results output_markdown: MarkdownSize = MarkdownSize.SHORT output_html: str = "local" + output_directory: str = None dry_run: bool = False stddev_threshold: float = 0.02 iterations_stddev: int = 5 diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 53dd4b1e8f968..49b4d1d84a214 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -8,9 +8,7 @@ from options import options -def generate_html(benchmark_runs: list, compare_names: list[str]): - # create path to data.js in html folder - html_path = os.path.join(os.path.dirname(__file__), "html") +def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str): benchmark_runs.sort(key=lambda run: run.date, reverse=True) if options.output_html == "local": From ba1297fe66693ef025b2aa6c14ebfc17bf2c3651 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 12 Mar 2025 09:00:38 -0700 Subject: [PATCH 029/114] [benchmark] Disabling UR test suites --- .github/workflows/ur-benchmarks-reusable.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index d7c32edfdfc2a..0aecbffc20fe7 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -161,7 +161,6 @@ jobs: taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py ~/llvm_bench_workdir --sycl ${{ github.workspace }}/sycl_build - --ur ${{ github.workspace }}/ur_install --adapter ${{ matrix.adapter.str_name }} --compare baseline --compute-runtime ${{ inputs.compute_runtime_commit }} @@ -169,6 +168,9 @@ jobs: ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} ${{ inputs.bench_script_params }} + # Temporarily disabled due to build faiures + # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849 + # --ur ${{ github.workspace }}/ur_install - name: Print benchmark results run: | From cd6097fdef7d77213b7a21658ca2e040fd9cf825 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 13 Mar 2025 11:42:00 +0000 Subject: [PATCH 030/114] update compute benchmarks and fix requirements --- devops/scripts/benchmarks/benches/compute.py | 2 +- devops/scripts/benchmarks/requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 92818cc00fad2..4b48f16b5fc6b 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -28,7 +28,7 @@ def setup(self): self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", - "9369275026229b182bc4a555b73c2ec995a9e2b7", + "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba", ) build_path = create_build_path(self.directory, "compute-benchmarks-build") diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt index 99ba0caab55c2..9f0381ceef6c2 100644 --- a/devops/scripts/benchmarks/requirements.txt +++ b/devops/scripts/benchmarks/requirements.txt @@ -2,3 +2,4 @@ matplotlib==3.9.2 mpld3==0.5.10 dataclasses-json==0.6.7 PyYAML==6.0.1 +Mako==1.3.9 From c4e92c6ac7a64ae26f9c15ea383473b71637c1e2 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 13 Mar 2025 12:09:48 +0000 Subject: [PATCH 031/114] fix url updates --- devops/scripts/benchmarks/html/scripts.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 7ba00738e727a..2bd52a70b07c8 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -13,12 +13,13 @@ let timeseriesData, barChartsData, allRunNames; let runSelect, selectedRunsDiv, suiteFiltersContainer; // Run selector functions -function updateSelectedRuns() { +function updateSelectedRuns(forceUpdate = true) { selectedRunsDiv.innerHTML = ''; activeRuns.forEach(name => { selectedRunsDiv.appendChild(createRunElement(name)); }); - updateCharts(); + if (forceUpdate) + updateCharts(); } function createRunElement(name) { @@ -439,7 +440,7 @@ function setupRunSelector() { runSelect.appendChild(option); }); - updateSelectedRuns(); + updateSelectedRuns(false); } function setupSuiteFilters() { From ed8eecce3d20e19f471ec65bb59b851bd215b486 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 13 Mar 2025 12:24:41 +0000 Subject: [PATCH 032/114] use timestamps in result file names --- devops/scripts/benchmarks/history.py | 26 +++++++++-------------- devops/scripts/benchmarks/utils/result.py | 1 + 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 2b7002ed7faa9..2ed63d129d140 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -13,7 +13,6 @@ class BenchmarkHistory: - benchmark_run_index_max = 0 runs = [] def __init__(self, dir): @@ -35,28 +34,22 @@ def load(self, n: int): # Get all JSON files in the results directory benchmark_files = list(results_dir.glob("*.json")) - # Extract index numbers and sort files by index number - def extract_index(file_path: Path) -> int: + # Extract timestamp and sort files by it + def extract_timestamp(file_path: Path) -> str: try: - return int(file_path.stem.split("_")[0]) - except (IndexError, ValueError): - return -1 + return file_path.stem.split("_")[-1] + except IndexError: + return "" - benchmark_files = [ - file for file in benchmark_files if extract_index(file) != -1 - ] - benchmark_files.sort(key=extract_index) + benchmark_files.sort(key=extract_timestamp, reverse=True) # Load the first n benchmark files benchmark_runs = [] - for file_path in benchmark_files[n::-1]: + for file_path in benchmark_files[:n]: benchmark_run = self.load_result(file_path) if benchmark_run: benchmark_runs.append(benchmark_run) - if benchmark_files: - self.benchmark_run_index_max = extract_index(benchmark_files[-1]) - self.runs = benchmark_runs def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: @@ -102,10 +95,11 @@ def save(self, save_name, results: list[Result], to_file=True): results_dir = Path(os.path.join(self.dir, "results")) os.makedirs(results_dir, exist_ok=True) - self.benchmark_run_index_max += 1 + # Use formatted timestamp for the filename + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_path = Path( os.path.join( - results_dir, f"{self.benchmark_run_index_max}_{save_name}.json" + results_dir, f"{save_name}_{timestamp}.json" ) ) with file_path.open("w") as file: diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py index 4e65a3b8aa582..7d82d9e488edf 100644 --- a/devops/scripts/benchmarks/utils/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -35,6 +35,7 @@ class Result: class BenchmarkRun: results: list[Result] name: str = "This PR" + hostname: str = "Unknown" git_hash: str = "" github_repo: str = None date: datetime = field( From 130212d2a2e0b1045605033a09412f430d13721a Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 13 Mar 2025 12:29:36 +0000 Subject: [PATCH 033/114] add hostname to benchmark run --- devops/scripts/benchmarks/history.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 2ed63d129d140..d1bdc3bfdb940 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -6,6 +6,7 @@ import os import json from pathlib import Path +import socket from utils.result import Result, BenchmarkRun from options import Compare, options from datetime import datetime, timezone @@ -82,6 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, + hostname=socket.gethostname() ) def save(self, save_name, results: list[Result], to_file=True): @@ -132,6 +134,7 @@ def compute_average(self, data: list[BenchmarkRun]): name=first_run.name, git_hash="average", date=first_run.date, # should this be different? + hostname=first_run.hostname ) return average_benchmark_run From 5323386c59d2457d79d1fee27b55dffc93be74a3 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 13 Mar 2025 17:00:23 +0000 Subject: [PATCH 034/114] add SubmitGraph benchmark ... and apply black formatting. --- devops/scripts/benchmarks/benches/compute.py | 70 +++++++++++++++++--- devops/scripts/benchmarks/history.py | 10 +-- devops/scripts/benchmarks/main.py | 2 +- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/presets.py | 2 +- 5 files changed, 68 insertions(+), 17 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 1f335cd8838ec..bc9d1d9d80d8a 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -28,7 +28,7 @@ def setup(self): self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", - "dfdbf2ff9437ee159627cc2cd9159c289da1a7ba", + "b5cc46acf61766ab00da04e85bd4da4f7591eb21", ) build_path = create_build_path(self.directory, "compute-benchmarks-build") @@ -87,6 +87,19 @@ def benchmarks(self) -> list[Benchmark]: UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1), ] + for in_order_queue in [0, 1]: + for num_kernels in [4, 32]: + for measure_completion_time in [0, 1]: + benches.append( + GraphApiSubmitGraph( + self, + RUNTIMES.SYCL, + in_order_queue, + num_kernels, + measure_completion_time, + ) + ) + if options.ur is not None: benches += [ SubmitKernelUR(self, 0, 0), @@ -536,14 +549,46 @@ def bin_args(self) -> list[str]: "--immediateAppendCmdList=0", ] + +class GraphApiSubmitGraph(ComputeBenchmark): + def __init__( + self, bench, runtime: RUNTIMES, inOrderQueue, numKernels, measureCompletionTime + ): + self.inOrderQueue = inOrderQueue + self.numKernels = numKernels + self.runtime = runtime + self.measureCompletionTime = measureCompletionTime + super().__init__(bench, f"graph_api_benchmark_{runtime.value}", "SubmitGraph") + + def explicit_group(self): + return f"SubmitGraph {self.numKernels}" + + def description(self) -> str: + return ( + f"Measures {self.runtime.value.upper()} performance when executing {self.numKernels} " + f"trivial kernels using graphs. Tests overhead and benefits of graph-based execution." + ) + + def name(self): + return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" + + def bin_args(self) -> list[str]: + return [ + "--iterations=10000", + f"--NumKernels={self.numKernels}", + f"--MeasureCompletionTime={self.measureCompletionTime}", + f"--InOrderQueue={self.inOrderQueue}", + "--Profiling=0", + "--KernelExecutionTime=1", + ] + + class UllsEmptyKernel(ComputeBenchmark): def __init__(self, bench, runtime: RUNTIMES, wgc, wgs): self.wgc = wgc self.wgs = wgs self.runtime = runtime - super().__init__( - bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel" - ) + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "EmptyKernel") def explicit_group(self): return f"EmptyKernel {self.wgc} {self.wgs}" @@ -561,8 +606,19 @@ def bin_args(self) -> list[str]: f"--wgc={self.wgs}", ] + class UllsKernelSwitch(ComputeBenchmark): - def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVisible, ioq, ctrBasedEvents): + def __init__( + self, + bench, + runtime: RUNTIMES, + count, + kernelTime, + barrier, + hostVisible, + ioq, + ctrBasedEvents, + ): self.count = count self.kernelTime = kernelTime self.barrier = barrier @@ -570,9 +626,7 @@ def __init__(self, bench, runtime: RUNTIMES, count, kernelTime, barrier, hostVis self.ctrBasedEvents = ctrBasedEvents self.runtime = runtime self.ioq = ioq - super().__init__( - bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch" - ) + super().__init__(bench, f"ulls_benchmark_{runtime.value}", "KernelSwitch") def explicit_group(self): return f"KernelSwitch {self.count} {self.kernelTime}" diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index d1bdc3bfdb940..f05e0192d26ee 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -83,7 +83,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, - hostname=socket.gethostname() + hostname=socket.gethostname(), ) def save(self, save_name, results: list[Result], to_file=True): @@ -99,11 +99,7 @@ def save(self, save_name, results: list[Result], to_file=True): # Use formatted timestamp for the filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - file_path = Path( - os.path.join( - results_dir, f"{save_name}_{timestamp}.json" - ) - ) + file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json")) with file_path.open("w") as file: json.dump(serialized, file, indent=4) print(f"Benchmark results saved to {file_path}") @@ -134,7 +130,7 @@ def compute_average(self, data: list[BenchmarkRun]): name=first_run.name, git_hash="average", date=first_run.date, # should this be different? - hostname=first_run.hostname + hostname=first_run.hostname, ) return average_benchmark_run diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 1a15e5407daf3..1d7304ea5e212 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -409,7 +409,7 @@ def validate_and_parse_env_args(env_args): "--output-dir", type=str, help="Location for output files, if --output-html or --output_markdown was specified.", - default=None + default=None, ) parser.add_argument( "--dry-run", diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 332d1615bc78d..ced76a5d692f2 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -3,6 +3,7 @@ from presets import presets + class Compare(Enum): LATEST = "latest" AVERAGE = "average" diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 7f5dc8d78460a..e394a8b4b622e 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -30,9 +30,9 @@ ], } + def enabled_suites(preset: str) -> list[str]: try: return presets[preset] except KeyError: raise ValueError(f"Preset '{preset}' not found.") - From 5bd1d568a4371041dab01e071349a2d392c409ba Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 13 Mar 2025 12:07:11 -0700 Subject: [PATCH 035/114] Restore sycl-linux-run-tests benchmarking action --- devops/actions/run-tests/benchmark/action.yml | 106 +++++++++++------- 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 7f69fdf832982..f90808f730787 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -27,16 +27,25 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} + RUNNER_NAME: ${{ runner.name }} run: | case "$RUNNER_TAG" in - '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; + '["PVC_PERF"]' ) ;; *) echo "#" - echo "# WARNING: Only gen12/pvc on Linux is fully supported." + echo "# WARNING: Only specific tuned runners are fully supported." echo "# This workflow is not guaranteed to work with other runners." echo "#" ;; esac + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV + # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in level_zero:*) ;; @@ -46,37 +55,58 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac - - name: Run compute-benchmarks + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + + - name: Compute CPU core range to run benchmarks on shell: bash run: | - cat << EOF - # - # NOTE TO DEVELOPERS: - # - - Check latter steps of the workflow: This job produces an artifact with: - - benchmark results from passing/failing tests - - log containing all failing (too slow) benchmarks - - log containing all erroring benchmarks + # Compute the core range for the first NUMA node; second node is used by + # UMF. Skip the first 4 cores as the kernel is likely to schedule more + # work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "CPU core range to use: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV - While this step in the workflow provides debugging output describing this - information, it might be easier to inspect the logs from the artifact - instead. - - EOF - export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results + - name: Run compute-benchmarks + shell: bash + run: | + # TODO generate summary + display helpful message here export CMPLR_ROOT=./toolchain echo "-----" sycl-ls echo "-----" - ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt + echo "-----" + mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save baseline \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --preset Minimal + echo "-----" + ls - name: Push compute-benchmarks results if: always() shell: bash run: | - # TODO -- waiting on security clearance - # Load configuration values - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + # TODO redo configuration + # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) cd "./llvm-ci-perf-results" git config user.name "SYCL Benchmarking Bot" @@ -88,20 +118,20 @@ runs: echo "No new results added, skipping push." else git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" + git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci fi - - name: Find benchmark result artifact here - if: always() - shell: bash - run: | - cat << EOF - # - # Artifact link for benchmark results here: - # - EOF - - name: Archive compute-benchmark results - if: always() - uses: actions/upload-artifact@v4 - with: - name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) - path: ./artifact +# - name: Find benchmark result artifact here +# if: always() +# shell: bash +# run: | +# cat << EOF +# # +# # Artifact link for benchmark results here: +# # +# EOF +# - name: Archive compute-benchmark results +# if: always() +# uses: actions/upload-artifact@v4 +# with: +# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) +# path: ./artifact From e9b1375dd0075dd053839370d5fc3bcf95cc4390 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 13 Mar 2025 12:36:55 -0700 Subject: [PATCH 036/114] Restore old SYCL benchmarking CI --- .github/workflows/sycl-linux-run-tests.yml | 10 ++ devops/actions/run-tests/benchmark/action.yml | 88 +++++------- .../actions/run-tests/benchmark_v2/action.yml | 134 ++++++++++++++++++ 3 files changed, 183 insertions(+), 49 deletions(-) create mode 100644 devops/actions/run-tests/benchmark_v2/action.yml diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index c30c5eccbcb62..f5b243cb7fc05 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -155,6 +155,7 @@ on: - e2e - cts - compute-benchmarks + - benchmark_v2 env: description: | @@ -330,3 +331,12 @@ jobs: env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} + + - name: Run benchmarks + if: inputs.tests_selector == 'benchmark_v2' + uses: ./devops/actions/run-tests/benchmark_v2 + with: + target_devices: ${{ inputs.target_devices }} + env: + RUNNER_TAG: ${{ inputs.runner }} + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index f90808f730787..03b7d4ad776fd 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -27,25 +27,16 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} - RUNNER_NAME: ${{ runner.name }} run: | case "$RUNNER_TAG" in - '["PVC_PERF"]' ) ;; + '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; *) echo "#" - echo "# WARNING: Only specific tuned runners are fully supported." + echo "# WARNING: Only gen12/pvc on Linux is fully supported." echo "# This workflow is not guaranteed to work with other runners." echo "#" ;; esac - # Ensure runner name has nothing injected - # TODO: in terms of security, is this overkill? - if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then - echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." - exit 1 - fi - echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV - # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in level_zero:*) ;; @@ -55,11 +46,11 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac - echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV - - name: Compute CPU core range to run benchmarks on shell: bash run: | + # Taken from ur-benchmark-reusable.yml: + # Compute the core range for the first NUMA node; second node is used by # UMF. Skip the first 4 cores as the kernel is likely to schedule more # work on these. @@ -76,37 +67,36 @@ runs: ZE_AFFINITY_MASK=0 echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV - - name: Checkout results repo - shell: bash - run: | - git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks shell: bash run: | - # TODO generate summary + display helpful message here + cat << EOF + # + # NOTE TO DEVELOPERS: + # + + Check latter steps of the workflow: This job produces an artifact with: + - benchmark results from passing/failing tests + - log containing all failing (too slow) benchmarks + - log containing all erroring benchmarks + + While this step in the workflow provides debugging output describing this + information, it might be easier to inspect the logs from the artifact + instead. + + EOF + export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" export CMPLR_ROOT=./toolchain echo "-----" sycl-ls echo "-----" - pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - echo "-----" - mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ - "$(realpath ./llvm_test_workdir)" \ - --sycl "$(realpath ./toolchain)" \ - --save baseline \ - --output-html remote \ - --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ - --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ - --preset Minimal - echo "-----" - ls + taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 - name: Push compute-benchmarks results if: always() shell: bash run: | - # TODO redo configuration - # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + # Load configuration values + $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) cd "./llvm-ci-perf-results" git config user.name "SYCL Benchmarking Bot" @@ -118,20 +108,20 @@ runs: echo "No new results added, skipping push." else git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci + git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" fi -# - name: Find benchmark result artifact here -# if: always() -# shell: bash -# run: | -# cat << EOF -# # -# # Artifact link for benchmark results here: -# # -# EOF -# - name: Archive compute-benchmark results -# if: always() -# uses: actions/upload-artifact@v4 -# with: -# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) -# path: ./artifact + - name: Find benchmark result artifact here + if: always() + shell: bash + run: | + cat << EOF + # + # Artifact link for benchmark results here: + # + EOF + - name: Archive compute-benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) + path: ./artifact diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml new file mode 100644 index 0000000000000..375bc20faf857 --- /dev/null +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -0,0 +1,134 @@ +name: 'Run benchmarks' + +# This action assumes the following prerequisites: +# +# - SYCL is placed in ./toolchain -- TODO change this +# - /devops has been checked out in ./devops. +# - env.GITHUB_TOKEN was properly set, because according to Github, that's +# apparently the recommended way to pass a secret into a github action: + +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +# +# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently, +# only specific runners are fully supported. + +inputs: + target_devices: + type: string + required: True + +runs: + using: "composite" + steps: + - name: Check specified runner type / target backend + shell: bash + env: + TARGET_DEVICE: ${{ inputs.target_devices }} + RUNNER_NAME: ${{ runner.name }} + run: | + case "$RUNNER_TAG" in + '["PVC_PERF"]' ) ;; + *) + echo "#" + echo "# WARNING: Only specific tuned runners are fully supported." + echo "# This workflow is not guaranteed to work with other runners." + echo "#" ;; + esac + + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV + + # input.target_devices is not directly used, as this allows code injection + case "$TARGET_DEVICE" in + level_zero:*) ;; + *) + echo "#" + echo "# WARNING: Only level_zero backend is fully supported." + echo "# This workflow is not guaranteed to work with other backends." + echo "#" ;; + esac + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + + - name: Compute CPU core range to run benchmarks on + shell: bash + run: | + # Compute the core range for the first NUMA node; second node is used by + # UMF. Skip the first 4 cores as the kernel is likely to schedule more + # work on these. + CORES="$(lscpu | awk ' + /NUMA node0 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }')" + echo "CPU core range to use: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=0 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results + - name: Run compute-benchmarks + shell: bash + run: | + # TODO generate summary + display helpful message here + export CMPLR_ROOT=./toolchain + echo "-----" + sycl-ls + echo "-----" + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt + echo "-----" + mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save baseline \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --preset Minimal + echo "-----" + ls + - name: Push compute-benchmarks results + if: always() + shell: bash + run: | + # TODO redo configuration + # $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) + + cd "./llvm-ci-perf-results" + git config user.name "SYCL Benchmarking Bot" + git config user.email "sys_sycl_benchmarks@intel.com" + git pull + git add . + # Make sure changes have been made + if git diff --quiet && git diff --cached --quiet; then + echo "No new results added, skipping push." + else + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci + fi +# - name: Find benchmark result artifact here +# if: always() +# shell: bash +# run: | +# cat << EOF +# # +# # Artifact link for benchmark results here: +# # +# EOF +# - name: Archive compute-benchmark results +# if: always() +# uses: actions/upload-artifact@v4 +# with: +# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) +# path: ./artifact From a3edf7aff115c3ebb64c90afe042a177ad4ea2c4 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 13 Mar 2025 12:47:26 -0700 Subject: [PATCH 037/114] Add benchmarking results to sycl-docs.yml --- .github/workflows/sycl-docs.yml | 1 + devops/scripts/benchmarks/html/config.js | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml index 5c1e8e425111b..a45c56bdd869c 100644 --- a/.github/workflows/sycl-docs.yml +++ b/.github/workflows/sycl-docs.yml @@ -49,6 +49,7 @@ jobs: mkdir clang mv $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* . mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/ + cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks touch .nojekyll # Upload the generated docs as an artifact and deploy to GitHub Pages. - name: Upload artifact diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js index 3e67ae1dce8e5..0a8551c5de152 100644 --- a/devops/scripts/benchmarks/html/config.js +++ b/devops/scripts/benchmarks/html/config.js @@ -1,2 +1,2 @@ -//remoteDataUrl = 'https://example.com/data.json'; +remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json'; //defaultCompareNames = ['baseline']; From 6620e4a889664a031414af2107e423f9b7e60169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 13 Mar 2025 21:20:44 +0100 Subject: [PATCH 038/114] [CI] Bump compute bench (#17431) - [x] remove the second, test commit --- .github/workflows/ur-benchmarks-reusable.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ur-benchmarks-reusable.yml b/.github/workflows/ur-benchmarks-reusable.yml index 0aecbffc20fe7..d7c32edfdfc2a 100644 --- a/.github/workflows/ur-benchmarks-reusable.yml +++ b/.github/workflows/ur-benchmarks-reusable.yml @@ -161,6 +161,7 @@ jobs: taskset -c "${{ env.CORES }}" ${BENCH_SCRIPTS_DIR}/main.py ~/llvm_bench_workdir --sycl ${{ github.workspace }}/sycl_build + --ur ${{ github.workspace }}/ur_install --adapter ${{ matrix.adapter.str_name }} --compare baseline --compute-runtime ${{ inputs.compute_runtime_commit }} @@ -168,9 +169,6 @@ jobs: ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} ${{ inputs.bench_script_params }} - # Temporarily disabled due to build faiures - # https://github.com/intel/llvm/actions/runs/13814877162/job/38645384849#step:14:849 - # --ur ${{ github.workspace }}/ur_install - name: Print benchmark results run: | From f4a2e39ad21e498d090fcacf62e519574a3cc0b6 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 13 Mar 2025 16:03:50 -0700 Subject: [PATCH 039/114] Initial implementation of unified benchmark workflow --- .github/workflows/benchmark.yml | 122 ++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 .github/workflows/benchmark.yml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000000000..3837b119a10e3 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,122 @@ +name: Run Benchmarks + +on: + schedule: + - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml + workflow_call: + inputs: + pr_no: + type: number + required: false + upload_results: + type: bool + required: true + runner: + type: string + required: true + backend: + type: string + required: true + reset_intel_gpu: + type: bool + required: true + default: true + + workflow_dispatch: + inputs: + pr_no: + description: Specific PR no. to build + type: number + required: false + upload_results: + description: 'Save and upload results' + type: choice + options: + - false + - true + default: true + runner: + type: choice + options: + - '["PVC_PERF"]' + backend: + description: Backend to use + type: choice + options: + - 'level_zero:gpu' + # TODO L0 V2 support + reset_intel_gpu: + description: Reset Intel GPUs + type: choice + options: + - false + - true + default: true + +permissions: + contents: read + packages: read + +jobs: + build_sycl: + name: Build SYCL from PR + if: inputs.pr_no != null + uses: ./.github/workflows/sycl-linux-build.yml + with: + build_ref: "origin/pr/${{ inputs.pr_no }}/merge" + build_cache_root: "/__w/" + build_artifact_suffix: "default" + build_cache_suffix: "default" + # Docker image has last nightly pre-installed and added to the PATH + build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" + cc: clang + cxx: clang++ + + run_benchmarks_build: + name: Run Benchmarks (on PR Build) + needs: [ build_sycl ] + if: inputs.pr_no != null + strategy: + matrix: + # Set default values if not specified: + include: + - runner: ${{ inputs.runner || '["PVC_PERF"]' }} + backend: ${{ inputs.backend || 'level_zero:gpu' }} + reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }} + ref: origin/pr/${{ inputs.pr_no }}/merge + uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit + with: + # TODO support other benchmarks + name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + runner: ${{ matrix.runner }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + reset_intel_gpu: ${{ matrix.reset_intel_gpu }} + tests_selector: benchmark_v2 + repo_ref: ${{ matrix.ref }} + sycl_toolchain_artifact: sycl_linux_default + sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }} + sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }} + + run_benchmarks_nightly: + name: Run Benchmarks (on Nightly Build) + if: inputs.pr_no == 0 + strategy: + matrix: + # Set default values if not specified: + include: + - runner: ${{ inputs.runner || '["PVC_PERF"]' }} + backend: ${{ inputs.backend || 'level_zero:gpu' }} + reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }} + uses: ./.github/workflows/sycl-linux-run-tests.yml + with: + # TODO support other benchmarks + name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + runner: ${{ matrix.runner }} + image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: ${{ matrix.backend }} + reset_intel_gpu: ${{ matrix.reset_intel_gpu }} + tests_selector: benchmark_v2 \ No newline at end of file From 38394bb5bff746d9b6e57da0f99d91d530412641 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 13 Mar 2025 16:11:56 -0700 Subject: [PATCH 040/114] [CI] Use commit hash instead, fix issues with run --- .github/workflows/benchmark.yml | 39 ++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3837b119a10e3..f044cbb066757 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -5,11 +5,11 @@ on: - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml workflow_call: inputs: - pr_no: - type: number + commit_hash: + type: string required: false upload_results: - type: bool + type: string # true/false: workflow_dispatch does not support booleans required: true runner: type: string @@ -18,16 +18,17 @@ on: type: string required: true reset_intel_gpu: - type: bool + type: string # true/false: workflow_dispatch does not support booleans required: true default: true workflow_dispatch: inputs: - pr_no: - description: Specific PR no. to build - type: number + commit_hash: + description: Commit hash to build intel/llvm from + type: string required: false + default: '' upload_results: description: 'Save and upload results' type: choice @@ -53,17 +54,15 @@ on: - true default: true -permissions: - contents: read - packages: read +permissions: read-all jobs: build_sycl: name: Build SYCL from PR - if: inputs.pr_no != null + if: inputs.commit_hash != '' uses: ./.github/workflows/sycl-linux-build.yml with: - build_ref: "origin/pr/${{ inputs.pr_no }}/merge" + build_ref: ${{ inputs.commit_hash }} build_cache_root: "/__w/" build_artifact_suffix: "default" build_cache_suffix: "default" @@ -71,19 +70,20 @@ jobs: build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" cc: clang cxx: clang++ + changes: '[]' run_benchmarks_build: name: Run Benchmarks (on PR Build) needs: [ build_sycl ] - if: inputs.pr_no != null + if: inputs.commit_hash != '' strategy: matrix: # Set default values if not specified: include: - runner: ${{ inputs.runner || '["PVC_PERF"]' }} backend: ${{ inputs.backend || 'level_zero:gpu' }} - reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }} - ref: origin/pr/${{ inputs.pr_no }}/merge + reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} + ref: ${{ inputs.commit_hash }} uses: ./.github/workflows/sycl-linux-run-tests.yml secrets: inherit with: @@ -96,21 +96,23 @@ jobs: reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 repo_ref: ${{ matrix.ref }} + devops_ref: ${{ github.ref }} sycl_toolchain_artifact: sycl_linux_default sycl_toolchain_archive: ${{ needs.build_sycl.outputs.artifact_archive_name }} sycl_toolchain_decompress_command: ${{ needs.build_sycl.outputs.artifact_decompress_command }} run_benchmarks_nightly: name: Run Benchmarks (on Nightly Build) - if: inputs.pr_no == 0 + if: inputs.commit_hash == '' strategy: matrix: # Set default values if not specified: include: - runner: ${{ inputs.runner || '["PVC_PERF"]' }} backend: ${{ inputs.backend || 'level_zero:gpu' }} - reset_intel_gpu: ${{ inputs.reset_intel_gpu || true }} + reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} uses: ./.github/workflows/sycl-linux-run-tests.yml + secrets: inherit with: # TODO support other benchmarks name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) @@ -119,4 +121,5 @@ jobs: image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} reset_intel_gpu: ${{ matrix.reset_intel_gpu }} - tests_selector: benchmark_v2 \ No newline at end of file + tests_selector: benchmark_v2 + repo_ref: ${{ github.ref }} From f232b93cec0f35c07e2c2ac416bc7699523b0496 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 10:59:56 +0100 Subject: [PATCH 041/114] add benchmark metadata --- devops/scripts/benchmarks/benches/base.py | 25 ++- devops/scripts/benchmarks/benches/compute.py | 29 +++- devops/scripts/benchmarks/benches/test.py | 40 +++-- devops/scripts/benchmarks/html/index.html | 121 +++++++++++++- devops/scripts/benchmarks/html/scripts.js | 158 ++++++++++++++++--- devops/scripts/benchmarks/main.py | 21 ++- devops/scripts/benchmarks/output_html.py | 22 ++- devops/scripts/benchmarks/utils/result.py | 9 ++ 8 files changed, 376 insertions(+), 49 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 77365220dbf85..1135a267864a9 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -6,7 +6,7 @@ import os import shutil from pathlib import Path -from utils.result import Result +from utils.result import BenchmarkMetadata, Result from options import options from utils.utils import download, run import urllib.request @@ -78,6 +78,9 @@ def download( def name(self): raise NotImplementedError() + def description(self): + return "No description provided." + def lower_is_better(self): return True @@ -96,6 +99,23 @@ def stddev_threshold(self): def get_suite_name(self) -> str: return self.suite.name() + def result_names(self) -> list[str]: + return [self.name()] + + def notes(self) -> str: + return None + + def unstable(self) -> str: + return None + + def get_metadata(self) -> BenchmarkMetadata: + return BenchmarkMetadata( + type='benchmark', + description=self.description(), + notes=self.notes(), + unstable=self.unstable(), + ) + class Suite: def benchmarks(self) -> list[Benchmark]: @@ -106,3 +126,6 @@ def name(self) -> str: def setup(self): return + + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return {} diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index bc9d1d9d80d8a..67ec0bf2087ff 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -8,7 +8,7 @@ import io from utils.utils import run, git_clone, create_build_path from .base import Benchmark, Suite -from utils.result import Result +from utils.result import BenchmarkMetadata, Result from options import options from enum import Enum @@ -54,6 +54,23 @@ def setup(self): self.built = True + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return { + "SubmitKernel" : BenchmarkMetadata( + type="group", + description="Measures CPU time overhead of submitting kernels through different APIs.", + notes="Each layer builds on top of the previous layer, adding functionality and overhead. " + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. " + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance." + "Work is ongoing to reduce the overhead of the SYCL API", + ), + "SinKernelGraph" : BenchmarkMetadata( + type="group", + unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", + ), + } + + def benchmarks(self) -> list[Benchmark]: if options.sycl is None: return [] @@ -106,14 +123,7 @@ def benchmarks(self) -> list[Benchmark]: SubmitKernelUR(self, 1, 0), SubmitKernelUR(self, 1, 1), MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 1, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1, 1), MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 100, 8, 102400, 10, 0, 1, 1), - MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1, 1), - MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1, 1), - MemcpyExecute(self, 4096, 1, 1024, 10, 0, 1, 0), MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5), GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5), @@ -540,6 +550,9 @@ def description(self) -> str: def name(self): return f"graph_api_benchmark_{self.runtime.value} SinKernelGraph graphs:{self.withGraphs}, numKernels:{self.numKernels}" + def unstable(self) -> str: + return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." + def bin_args(self) -> list[str]: return [ "--iterations=10000", diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 18794d4e9c73c..e7451e24f25cf 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -6,7 +6,7 @@ import random from utils.utils import git_clone from .base import Benchmark, Suite -from utils.result import Result +from utils.result import BenchmarkMetadata, Result from utils.utils import run, create_build_path from options import options import os @@ -24,33 +24,49 @@ def name(self) -> str: def benchmarks(self) -> list[Benchmark]: bench_configs = [ - ("Memory Bandwidth", 2000, 200, "Foo Group"), - ("Latency", 100, 20, "Bar Group"), - ("Throughput", 1500, 150, "Foo Group"), - ("FLOPS", 3000, 300, "Foo Group"), - ("Cache Miss Rate", 250, 25, "Bar Group"), + ("Memory Bandwidth", 2000, 200, "Foo Group", None, None), + ("Latency", 100, 20, "Bar Group", "A Latency test note!", None), + ("Throughput", 1500, 150, "Foo Group", None, None), + ("FLOPS", 3000, 300, "Foo Group", None, "Unstable FLOPS test!"), + ("Cache Miss Rate", 250, 25, "Bar Group", "Test Note", "And another note!"), ] result = [] - for base_name, base_value, base_diff, group in bench_configs: + for base_name, base_value, base_diff, group, notes, unstable in bench_configs: for variant in range(6): value_multiplier = 1.0 + (variant * 0.2) name = f"{base_name} {variant+1}" value = base_value * value_multiplier diff = base_diff * value_multiplier - result.append(TestBench(self, name, value, diff, group)) + result.append(TestBench(self, name, value, diff, group, notes, unstable)) return result + def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: + return { + "Foo Group" : BenchmarkMetadata( + type="group", + description="This is a test benchmark for Foo Group.", + notes="This is a test note for Foo Group.", + ), + "Bar Group" : BenchmarkMetadata( + type="group", + description="This is a test benchmark for Bar Group.", + unstable="This is an unstable note for Bar Group.", + ), + } + class TestBench(Benchmark): - def __init__(self, suite, name, value, diff, group=""): + def __init__(self, suite, name, value, diff, group="", notes=None, unstable=None): super().__init__("", suite) self.bname = name self.value = value self.diff = diff self.group = group + self.notes_text = notes + self.unstable_text = unstable def name(self): return self.bname @@ -64,6 +80,12 @@ def setup(self): def description(self) -> str: return f"This is a test benchmark for {self.bname}." + def notes(self) -> str: + return self.notes_text + + def unstable(self) -> str: + return self.unstable_text + def run(self, env_vars) -> list[Result]: random_value = self.value + random.uniform(-1 * (self.diff), self.diff) return [ diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index c10844f15c707..c40174b7f35a0 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -171,7 +171,98 @@ .extra-info-entry em { color: #555; } - + .display-options-container { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; + } + .display-options-container label { + margin: 0 12px; + cursor: pointer; + } + .display-options-container input { + margin-right: 8px; + } + .benchmark-note { + background-color: #cfe2ff; + color: #084298; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #084298; + } + .benchmark-unstable { + background-color: #f8d7da; + color: #842029; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #842029; + } + .note-text { + color: #084298; + } + .unstable-warning { + color: #842029; + font-weight: bold; + } + .unstable-text { + color: #842029; + } + .options-container { + margin-bottom: 24px; + background: #e9ecef; + border-radius: 8px; + overflow: hidden; + } + .options-container summary { + padding: 12px 16px; + font-weight: 500; + cursor: pointer; + background: #dee2e6; + user-select: none; + } + .options-container summary:hover { + background: #ced4da; + } + .options-content { + padding: 16px; + display: flex; + flex-wrap: wrap; + gap: 24px; + } + .filter-section { + flex: 1; + min-width: 300px; + } + .filter-section h3 { + margin-top: 0; + margin-bottom: 12px; + font-size: 18px; + font-weight: 500; + text-align: left; + } + #suite-filters { + display: flex; + flex-wrap: wrap; + gap: 8px; + } + .display-options { + display: flex; + flex-direction: column; + gap: 8px; + } + .display-options label { + display: flex; + align-items: center; + cursor: pointer; + } + .display-options input { + margin-right: 8px; + } +
@@ -182,9 +273,6 @@

Benchmark Results

-
- -
+ Director's commentary + + +
+
+ +
Historical Results
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 2bd52a70b07c8..ed7e361e14275 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -12,6 +12,10 @@ let timeseriesData, barChartsData, allRunNames; // DOM Elements let runSelect, selectedRunsDiv, suiteFiltersContainer; +// Add this at the top of the file with the other variable declarations +let showNotes = true; +let showUnstable = false; + // Run selector functions function updateSelectedRuns(forceUpdate = true) { selectedRunsDiv.innerHTML = ''; @@ -85,7 +89,8 @@ function createChart(data, containerId, type) { title: { display: true, text: data.unit - } + }, + grace: '20%', } } }; @@ -178,7 +183,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) { // Create timeseries charts filteredTimeseriesData.forEach((data, index) => { const containerId = `timeseries-${index}`; - const container = createChartContainer(data, containerId); + const container = createChartContainer(data, containerId, 'benchmark'); document.querySelector('.timeseries .charts').appendChild(container); createChart(data, containerId, 'time'); }); @@ -186,7 +191,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) { // Create bar charts filteredBarChartsData.forEach((data, index) => { const containerId = `barchart-${index}`; - const container = createChartContainer(data, containerId); + const container = createChartContainer(data, containerId, 'group'); document.querySelector('.bar-charts .charts').appendChild(container); createChart(data, containerId, 'bar'); }); @@ -195,11 +200,41 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData) { filterCharts(); } -function createChartContainer(data, canvasId) { +function createChartContainer(data, canvasId, type) { const container = document.createElement('div'); container.className = 'chart-container'; container.setAttribute('data-label', data.label); container.setAttribute('data-suite', data.suite); + + // Check if this benchmark is marked as unstable + const metadata = metadataForLabel(data.label, type); + if (metadata && metadata.unstable) { + container.setAttribute('data-unstable', 'true'); + + // Add unstable warning + const unstableWarning = document.createElement('div'); + unstableWarning.className = 'benchmark-unstable'; + unstableWarning.textContent = metadata.unstable; + unstableWarning.style.display = showUnstable ? 'block' : 'none'; + container.appendChild(unstableWarning); + } + + // Add notes if present + if (metadata && metadata.notes) { + const noteElement = document.createElement('div'); + noteElement.className = 'benchmark-note'; + noteElement.textContent = metadata.notes; + noteElement.style.display = showNotes ? 'block' : 'none'; + container.appendChild(noteElement); + } + + // Add description if present in metadata, but only for groups + if (metadata && metadata.description && metadata.type === "group") { + const descElement = document.createElement('div'); + descElement.className = 'benchmark-description'; + descElement.textContent = metadata.description; + container.appendChild(descElement); + } const canvas = document.createElement('canvas'); canvas.id = canvasId; @@ -221,11 +256,10 @@ function createChartContainer(data, canvasId) { summary.appendChild(downloadButton); details.appendChild(summary); - latestRunsLookup = createLatestRunsLookup(benchmarkRuns); - // Create and append extra info const extraInfo = document.createElement('div'); extraInfo.className = 'extra-info'; + latestRunsLookup = createLatestRunsLookup(benchmarkRuns); extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data); details.appendChild(extraInfo); @@ -234,6 +268,16 @@ function createChartContainer(data, canvasId) { return container; } +function metadataForLabel(label, type) { + for (const [key, metadata] of Object.entries(benchmarkMetadata)) { + if (metadata.type === type && label.startsWith(key)) { + return metadata; + } + } + + return null; +} + // Pre-compute a lookup for the latest run per label function createLatestRunsLookup(benchmarkRuns) { const latestRunsMap = new Map(); @@ -259,17 +303,31 @@ function generateExtraInfo(latestRunsLookup, data) { const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label]; return labels.map(label => { + const metadata = metadataForLabel(label); const latestRun = latestRunsLookup.get(label); - - if (latestRun) { - return `
- ${label}: ${formatCommand(latestRun.result)}
- Description: ${latestRun.result.description} -
`; + + let html = '
'; + + if (metadata) { + html += `${label}: ${formatCommand(latestRun.result)}
`; + + if (metadata.description) { + html += `Description: ${metadata.description}`; + } + + if (metadata.notes) { + html += `
Notes: ${metadata.notes}`; + } + + if (metadata.unstable) { + html += `
⚠️ Unstable: ${metadata.unstable}`; + } + } else { + html += `${label}: No data available`; } - return `
- ${label}: No data available -
`; + + html += '
'; + return html; }).join(''); } @@ -331,6 +389,10 @@ function updateURL() { url.searchParams.delete('runs'); } + // Add toggle states to URL + url.searchParams.set('notes', showNotes); + url.searchParams.set('unstable', showUnstable); + history.replaceState(null, '', url); } @@ -342,7 +404,19 @@ function filterCharts() { document.querySelectorAll('.chart-container').forEach(container => { const label = container.getAttribute('data-label'); const suite = container.getAttribute('data-suite'); - container.style.display = (regex.test(label) && activeSuites.includes(suite)) ? '' : 'none'; + const isUnstable = container.getAttribute('data-unstable') === 'true'; + + // Hide unstable benchmarks if showUnstable is false + const shouldShow = regex.test(label) && + activeSuites.includes(suite) && + (showUnstable || !isUnstable); + + container.style.display = shouldShow ? '' : 'none'; + }); + + // Update notes visibility + document.querySelectorAll('.benchmark-note').forEach(note => { + note.style.display = showNotes ? 'block' : 'none'; }); updateURL(); @@ -395,13 +469,20 @@ function processBarChartsData(benchmarkRuns) { if (!result.explicit_group) return; if (!groupedResults[result.explicit_group]) { + // Look up group metadata + const groupMetadata = metadataForLabel(result.explicit_group); + groupedResults[result.explicit_group] = { label: result.explicit_group, suite: result.suite, unit: result.unit, lower_is_better: result.lower_is_better, labels: [], - datasets: [] + datasets: [], + // Add metadata if available + description: groupMetadata?.description || null, + notes: groupMetadata?.notes || null, + unstable: groupMetadata?.unstable || null }; } @@ -466,6 +547,43 @@ function setupSuiteFilters() { }); } +function setupToggles() { + const notesToggle = document.getElementById('show-notes'); + const unstableToggle = document.getElementById('show-unstable'); + + notesToggle.addEventListener('change', function() { + showNotes = this.checked; + // Update all note elements visibility + document.querySelectorAll('.benchmark-note').forEach(note => { + note.style.display = showNotes ? 'block' : 'none'; + }); + filterCharts(); + }); + + unstableToggle.addEventListener('change', function() { + showUnstable = this.checked; + // Update all unstable warning elements visibility + document.querySelectorAll('.benchmark-unstable').forEach(warning => { + warning.style.display = showUnstable ? 'block' : 'none'; + }); + filterCharts(); + }); + + // Initialize from URL params if present + const notesParam = getQueryParam('notes'); + const unstableParam = getQueryParam('unstable'); + + if (notesParam !== null) { + showNotes = notesParam === 'true'; + notesToggle.checked = showNotes; + } + + if (unstableParam !== null) { + showUnstable = unstableParam === 'true'; + unstableToggle.checked = showUnstable; + } +} + function initializeCharts() { // Process raw data timeseriesData = processTimeseriesData(benchmarkRuns); @@ -502,6 +620,7 @@ function initializeCharts() { // Setup UI components setupRunSelector(); setupSuiteFilters(); + setupToggles(); // Apply URL parameters const regexParam = getQueryParam('regex'); @@ -542,7 +661,8 @@ function loadData() { fetch(remoteDataUrl) .then(response => response.json()) .then(data => { - benchmarkRuns = data; + benchmarkRuns = data.runs || data; + benchmarkMetadata = data.metadata || benchmarkMetadata || {}; initializeCharts(); }) .catch(error => { @@ -553,7 +673,7 @@ function loadData() { loadingIndicator.style.display = 'none'; // Hide loading indicator }); } else { - // Use local data + // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js) initializeCharts(); loadingIndicator.style.display = 'none'; // Hide loading indicator } diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 1d7304ea5e212..8db0549a861a4 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -137,6 +137,18 @@ def process_results( return valid_results, processed +def collect_metadata(suites): + metadata = {} + + for s in suites: + metadata.update(s.additionalMetadata()) + suite_benchmarks = s.benchmarks() + for benchmark in suite_benchmarks: + metadata[benchmark.name()] = benchmark.get_metadata() + + return metadata + + def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) @@ -160,6 +172,13 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): else [] ) + # Collect metadata from all benchmarks without setting them up + metadata = collect_metadata(suites) + + # If dry run, we're done + if options.dry_run: + suites = [] + benchmarks = [] failures = {} @@ -290,7 +309,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): html_path = options.output_directory if options.output_directory is None: html_path = os.path.join(os.path.dirname(__file__), "html") - generate_html(history.runs, compare_names, html_path) + generate_html(history.runs, compare_names, html_path, metadata) def validate_and_parse_env_args(env_args): diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 49b4d1d84a214..b71f87371b383 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -6,10 +6,17 @@ import json import os from options import options +from utils.result import BenchmarkMetadata -def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str): +def generate_html( + benchmark_runs: list, + compare_names: list[str], + html_path: str, + metadata: dict[str, BenchmarkMetadata], +): benchmark_runs.sort(key=lambda run: run.date, reverse=True) + serializable_metadata = {k: v.__dict__ for k, v in metadata.items()} if options.output_html == "local": data_path = os.path.join(html_path, "data.js") @@ -26,6 +33,11 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str f.write("\n];\n\n") # terminates benchmarkRuns + f.write("benchmarkMetadata = ") + json.dump(serializable_metadata, f) + + f.write(";\n\n") # terminates benchmarkMetadata + f.write("defaultCompareNames = ") json.dump(compare_names, f) f.write(";\n") # terminates defaultCompareNames @@ -34,12 +46,8 @@ def generate_html(benchmark_runs: list, compare_names: list[str], html_path: str else: data_path = os.path.join(html_path, "data.json") with open(data_path, "w") as f: - f.write("[\n") - for i, run in enumerate(benchmark_runs): - if i > 0: - f.write(",\n") - f.write(run.to_json()) - f.write("\n]\n") + json_data = {"runs": benchmark_runs, "metadata": serializable_metadata} + json.dump(json_data, f, indent=2) print( f"Upload {data_path} to a location set in config.js remoteDataUrl argument." diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py index 7d82d9e488edf..11d837068b887 100644 --- a/devops/scripts/benchmarks/utils/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -42,3 +42,12 @@ class BenchmarkRun: default=None, metadata=config(encoder=datetime.isoformat, decoder=datetime.fromisoformat), ) + + +@dataclass_json +@dataclass +class BenchmarkMetadata: + type: str = 'benchmark' # or 'group' + description: Optional[str] = None + notes: Optional[str] = None + unstable: Optional[str] = None From 30cd308f4faec6e884234fec382c0580cec8f9ca Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 12:23:37 +0100 Subject: [PATCH 042/114] apply formatting --- devops/scripts/benchmarks/benches/base.py | 2 +- devops/scripts/benchmarks/benches/compute.py | 11 +++--- devops/scripts/benchmarks/benches/test.py | 8 ++-- devops/scripts/benchmarks/html/scripts.js | 40 ++++++++++---------- devops/scripts/benchmarks/utils/result.py | 2 +- 5 files changed, 32 insertions(+), 31 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 1135a267864a9..8403097eca168 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -110,7 +110,7 @@ def unstable(self) -> str: def get_metadata(self) -> BenchmarkMetadata: return BenchmarkMetadata( - type='benchmark', + type="benchmark", description=self.description(), notes=self.notes(), unstable=self.unstable(), diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 67ec0bf2087ff..9386f4d2b1b35 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -56,21 +56,20 @@ def setup(self): def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: return { - "SubmitKernel" : BenchmarkMetadata( + "SubmitKernel": BenchmarkMetadata( type="group", description="Measures CPU time overhead of submitting kernels through different APIs.", notes="Each layer builds on top of the previous layer, adding functionality and overhead. " - "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. " - "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance." - "Work is ongoing to reduce the overhead of the SYCL API", + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. " + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance." + "Work is ongoing to reduce the overhead of the SYCL API", ), - "SinKernelGraph" : BenchmarkMetadata( + "SinKernelGraph": BenchmarkMetadata( type="group", unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", ), } - def benchmarks(self) -> list[Benchmark]: if options.sycl is None: return [] diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index e7451e24f25cf..3802597f5c48a 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -39,18 +39,20 @@ def benchmarks(self) -> list[Benchmark]: value = base_value * value_multiplier diff = base_diff * value_multiplier - result.append(TestBench(self, name, value, diff, group, notes, unstable)) + result.append( + TestBench(self, name, value, diff, group, notes, unstable) + ) return result def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: return { - "Foo Group" : BenchmarkMetadata( + "Foo Group": BenchmarkMetadata( type="group", description="This is a test benchmark for Foo Group.", notes="This is a test note for Foo Group.", ), - "Bar Group" : BenchmarkMetadata( + "Bar Group": BenchmarkMetadata( type="group", description="This is a test benchmark for Bar Group.", unstable="This is an unstable note for Bar Group.", diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index ed7e361e14275..4136bb647b079 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -205,12 +205,12 @@ function createChartContainer(data, canvasId, type) { container.className = 'chart-container'; container.setAttribute('data-label', data.label); container.setAttribute('data-suite', data.suite); - + // Check if this benchmark is marked as unstable const metadata = metadataForLabel(data.label, type); if (metadata && metadata.unstable) { container.setAttribute('data-unstable', 'true'); - + // Add unstable warning const unstableWarning = document.createElement('div'); unstableWarning.className = 'benchmark-unstable'; @@ -218,7 +218,7 @@ function createChartContainer(data, canvasId, type) { unstableWarning.style.display = showUnstable ? 'block' : 'none'; container.appendChild(unstableWarning); } - + // Add notes if present if (metadata && metadata.notes) { const noteElement = document.createElement('div'); @@ -227,7 +227,7 @@ function createChartContainer(data, canvasId, type) { noteElement.style.display = showNotes ? 'block' : 'none'; container.appendChild(noteElement); } - + // Add description if present in metadata, but only for groups if (metadata && metadata.description && metadata.type === "group") { const descElement = document.createElement('div'); @@ -274,7 +274,7 @@ function metadataForLabel(label, type) { return metadata; } } - + return null; } @@ -305,27 +305,27 @@ function generateExtraInfo(latestRunsLookup, data) { return labels.map(label => { const metadata = metadataForLabel(label); const latestRun = latestRunsLookup.get(label); - + let html = '
'; - + if (metadata) { html += `${label}: ${formatCommand(latestRun.result)}
`; - + if (metadata.description) { html += `Description: ${metadata.description}`; } - + if (metadata.notes) { html += `
Notes: ${metadata.notes}`; } - + if (metadata.unstable) { html += `
⚠️ Unstable: ${metadata.unstable}`; } } else { html += `${label}: No data available`; } - + html += '
'; return html; }).join(''); @@ -407,9 +407,9 @@ function filterCharts() { const isUnstable = container.getAttribute('data-unstable') === 'true'; // Hide unstable benchmarks if showUnstable is false - const shouldShow = regex.test(label) && - activeSuites.includes(suite) && - (showUnstable || !isUnstable); + const shouldShow = regex.test(label) && + activeSuites.includes(suite) && + (showUnstable || !isUnstable); container.style.display = shouldShow ? '' : 'none'; }); @@ -471,7 +471,7 @@ function processBarChartsData(benchmarkRuns) { if (!groupedResults[result.explicit_group]) { // Look up group metadata const groupMetadata = metadataForLabel(result.explicit_group); - + groupedResults[result.explicit_group] = { label: result.explicit_group, suite: result.suite, @@ -550,7 +550,7 @@ function setupSuiteFilters() { function setupToggles() { const notesToggle = document.getElementById('show-notes'); const unstableToggle = document.getElementById('show-unstable'); - + notesToggle.addEventListener('change', function() { showNotes = this.checked; // Update all note elements visibility @@ -559,7 +559,7 @@ function setupToggles() { }); filterCharts(); }); - + unstableToggle.addEventListener('change', function() { showUnstable = this.checked; // Update all unstable warning elements visibility @@ -568,16 +568,16 @@ function setupToggles() { }); filterCharts(); }); - + // Initialize from URL params if present const notesParam = getQueryParam('notes'); const unstableParam = getQueryParam('unstable'); - + if (notesParam !== null) { showNotes = notesParam === 'true'; notesToggle.checked = showNotes; } - + if (unstableParam !== null) { showUnstable = unstableParam === 'true'; unstableToggle.checked = showUnstable; diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py index 11d837068b887..b29d973602a35 100644 --- a/devops/scripts/benchmarks/utils/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -47,7 +47,7 @@ class BenchmarkRun: @dataclass_json @dataclass class BenchmarkMetadata: - type: str = 'benchmark' # or 'group' + type: str = "benchmark" # or 'group' description: Optional[str] = None notes: Optional[str] = None unstable: Optional[str] = None From 5e0539a50fd9835b99391a6bc91e833604cf40ea Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 12:33:26 +0100 Subject: [PATCH 043/114] fix multiple descriptions/notes --- devops/scripts/benchmarks/benches/compute.py | 8 ++++---- devops/scripts/benchmarks/benches/test.py | 3 ++- devops/scripts/benchmarks/html/index.html | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 9386f4d2b1b35..f69df1966d690 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -59,10 +59,10 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "SubmitKernel": BenchmarkMetadata( type="group", description="Measures CPU time overhead of submitting kernels through different APIs.", - notes="Each layer builds on top of the previous layer, adding functionality and overhead. " - "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API. " - "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance." - "Work is ongoing to reduce the overhead of the SYCL API", + notes="Each layer builds on top of the previous layer, adding functionality and overhead.\n" + "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" + "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" + "Work is ongoing to reduce the overhead of the SYCL API\n", ), "SinKernelGraph": BenchmarkMetadata( type="group", diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 3802597f5c48a..0e4ee55286fb0 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -50,7 +50,8 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "Foo Group": BenchmarkMetadata( type="group", description="This is a test benchmark for Foo Group.", - notes="This is a test note for Foo Group.", + notes="This is a test note for Foo Group.\n" + "Look, multiple lines!", ), "Bar Group": BenchmarkMetadata( type="group", diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index c40174b7f35a0..446b103029c80 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -192,6 +192,7 @@ margin-bottom: 10px; border-radius: 5px; border-left: 4px solid #084298; + white-space: pre-line; } .benchmark-unstable { background-color: #f8d7da; @@ -200,6 +201,7 @@ margin-bottom: 10px; border-radius: 5px; border-left: 4px solid #842029; + white-space: pre-line; } .note-text { color: #084298; From 137407a3e41f8764e51a42e88d16c7f6b6abcb79 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 12:38:20 +0100 Subject: [PATCH 044/114] fix benchmark descriptions --- devops/scripts/benchmarks/html/index.html | 10 ++++++++++ devops/scripts/benchmarks/html/scripts.js | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index 446b103029c80..32a00ab67bb47 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -264,6 +264,16 @@ .display-options input { margin-right: 8px; } + .benchmark-description { + background-color: #f2f2f2; + color: #333; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #6c757d; + white-space: pre-line; + font-style: italic; + } diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 4136bb647b079..47d23ff8f6e9d 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -260,7 +260,7 @@ function createChartContainer(data, canvasId, type) { const extraInfo = document.createElement('div'); extraInfo.className = 'extra-info'; latestRunsLookup = createLatestRunsLookup(benchmarkRuns); - extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data); + extraInfo.innerHTML = generateExtraInfo(latestRunsLookup, data, 'benchmark'); details.appendChild(extraInfo); container.appendChild(details); @@ -299,11 +299,11 @@ function createLatestRunsLookup(benchmarkRuns) { return latestRunsMap; } -function generateExtraInfo(latestRunsLookup, data) { +function generateExtraInfo(latestRunsLookup, data, type) { const labels = data.datasets ? data.datasets.map(dataset => dataset.label) : [data.label]; return labels.map(label => { - const metadata = metadataForLabel(label); + const metadata = metadataForLabel(label, type); const latestRun = latestRunsLookup.get(label); let html = '
'; From e0f5ca61518604940f08ad0eb7f21ed5b42aa945 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 12:46:57 +0100 Subject: [PATCH 045/114] fix remote html output --- devops/scripts/benchmarks/benches/test.py | 3 +- devops/scripts/benchmarks/output_html.py | 36 +++++++++++------------ 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 0e4ee55286fb0..4862bc64ecbaf 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -50,8 +50,7 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "Foo Group": BenchmarkMetadata( type="group", description="This is a test benchmark for Foo Group.", - notes="This is a test note for Foo Group.\n" - "Look, multiple lines!", + notes="This is a test note for Foo Group.\n" "Look, multiple lines!", ), "Bar Group": BenchmarkMetadata( type="group", diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index b71f87371b383..40a3f914e5115 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -18,36 +18,36 @@ def generate_html( benchmark_runs.sort(key=lambda run: run.date, reverse=True) serializable_metadata = {k: v.__dict__ for k, v in metadata.items()} + serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs] + + data = { + "runs": serializable_runs, + "metadata": serializable_metadata, + "defaultCompareNames": compare_names, + } + if options.output_html == "local": data_path = os.path.join(html_path, "data.js") - # Write data to js file - # We can't store this as a standalone json file because it needs to be inline in the html with open(data_path, "w") as f: - f.write("benchmarkRuns = [\n") - # it might be tempting to just to create a list and convert - # that to a json, but that leads to json being serialized twice. - for i, run in enumerate(benchmark_runs): - if i > 0: - f.write(",\n") - f.write(run.to_json()) - - f.write("\n];\n\n") # terminates benchmarkRuns + # For local format, we need to write JavaScript variable assignments + f.write("benchmarkRuns = ") + json.dump(data["runs"], f, indent=2) + f.write(";\n\n") f.write("benchmarkMetadata = ") - json.dump(serializable_metadata, f) - - f.write(";\n\n") # terminates benchmarkMetadata + json.dump(data["metadata"], f, indent=2) + f.write(";\n\n") f.write("defaultCompareNames = ") - json.dump(compare_names, f) - f.write(";\n") # terminates defaultCompareNames + json.dump(data["defaultCompareNames"], f, indent=2) + f.write(";\n") print(f"See {os.getcwd()}/html/index.html for the results.") else: + # For remote format, we write a single JSON file data_path = os.path.join(html_path, "data.json") with open(data_path, "w") as f: - json_data = {"runs": benchmark_runs, "metadata": serializable_metadata} - json.dump(json_data, f, indent=2) + json.dump(data, f, indent=2) print( f"Upload {data_path} to a location set in config.js remoteDataUrl argument." From 1041db695a7da031879bea08f2b2b0b0c9e76151 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 12:55:39 +0100 Subject: [PATCH 046/114] fix metadata collection with dry run --- devops/scripts/benchmarks/main.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 8db0549a861a4..e701b9eac70a2 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -159,18 +159,14 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): options.extra_ld_libraries.extend(cr.ld_libraries()) options.extra_env_vars.update(cr.env_vars()) - suites = ( - [ - ComputeBench(directory), - VelocityBench(directory), - SyclBench(directory), - LlamaCppBench(directory), - UMFSuite(directory), - TestSuite(), - ] - if not options.dry_run - else [] - ) + suites = [ + ComputeBench(directory), + VelocityBench(directory), + SyclBench(directory), + LlamaCppBench(directory), + UMFSuite(directory), + TestSuite(), + ] # Collect metadata from all benchmarks without setting them up metadata = collect_metadata(suites) From fae04f46984b39925c1724b0e3c7125490d4ab7b Mon Sep 17 00:00:00 2001 From: pbalcer Date: Fri, 14 Mar 2025 13:30:46 +0100 Subject: [PATCH 047/114] cleanup compute bench, fix readme, use newer sycl-bench --- devops/scripts/benchmarks/README.md | 8 +- devops/scripts/benchmarks/benches/base.py | 13 +- devops/scripts/benchmarks/benches/compute.py | 201 ++++++++---------- .../scripts/benchmarks/benches/syclbench.py | 4 +- devops/scripts/benchmarks/main.py | 2 +- 5 files changed, 99 insertions(+), 129 deletions(-) diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md index 004fe14eca35b..fcadded3cad51 100644 --- a/devops/scripts/benchmarks/README.md +++ b/devops/scripts/benchmarks/README.md @@ -6,6 +6,8 @@ Scripts for running performance tests on SYCL and Unified Runtime. - [Velocity Bench](https://github.com/oneapi-src/Velocity-Bench) - [Compute Benchmarks](https://github.com/intel/compute-benchmarks/) +- [LlamaCpp Benchmarks](https://github.com/ggerganov/llama.cpp) +- [SYCL-Bench](https://github.com/unisa-hpc/sycl-bench) ## Running @@ -27,8 +29,6 @@ You can also include additional benchmark parameters, such as environment variab Once all the required information is entered, click the "Run workflow" button to initiate a new workflow run. This will execute the benchmarks and then post the results as a comment on the specified Pull Request. -By default, all benchmark runs are compared against `baseline`, which is a well-established set of the latest data. - You must be a member of the `oneapi-src` organization to access these features. ## Comparing results @@ -37,8 +37,8 @@ By default, the benchmark results are not stored. To store them, use the option You can compare benchmark results using `--compare` option. The comparison will be presented in a markdown output file (see below). If you want to calculate the relative performance of the new results against the previously saved data, use `--compare ` (i.e. `--compare baseline`). In case of comparing only stored data without generating new results, use `--dry-run --compare --compare --relative-perf `, where `name1` indicates the baseline for the relative performance calculation and `--dry-run` prevents the script for running benchmarks. Listing more than two `--compare` options results in displaying only execution time, without statistical analysis. -Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results -are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html). +Baseline_L0, as well as Baseline_L0v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results +are stored [here](https://oneapi-src.github.io/unified-runtime/performance/). ## Output formats You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`. diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 8403097eca168..1bc99b11518e3 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -75,12 +75,6 @@ def download( self.data_path = self.create_data_path(name, skip_data_dir) return download(self.data_path, url, file, untar, unzip, checksum) - def name(self): - raise NotImplementedError() - - def description(self): - return "No description provided." - def lower_is_better(self): return True @@ -99,8 +93,11 @@ def stddev_threshold(self): def get_suite_name(self) -> str: return self.suite.name() - def result_names(self) -> list[str]: - return [self.name()] + def name(self): + raise NotImplementedError() + + def description(self): + return "No description provided." def notes(self) -> str: return None diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index f69df1966d690..c26f645635d27 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -13,6 +13,20 @@ from enum import Enum +class RUNTIMES(Enum): + SYCL = "sycl" + LEVEL_ZERO = "l0" + UR = "ur" + + +def runtime_to_name(runtime: RUNTIMES) -> str: + return { + RUNTIMES.SYCL: "SYCL", + RUNTIMES.LEVEL_ZERO: "Level Zero", + RUNTIMES.UR: "Unified Runtime", + }[runtime] + + class ComputeBench(Suite): def __init__(self, directory): self.directory = directory @@ -70,6 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: ), } + def enabled_runtimes(self, supported_runtimes=None): + # all runtimes in the RUNTIMES enum + runtimes = supported_runtimes or list(RUNTIMES) + + # Filter out UR if not available + if options.ur is None: + runtimes = [r for r in runtimes if r != RUNTIMES.UR] + + return runtimes + def benchmarks(self) -> list[Benchmark]: if options.sycl is None: return [] @@ -77,11 +101,46 @@ def benchmarks(self) -> list[Benchmark]: if options.ur_adapter == "cuda": return [] - benches = [ - SubmitKernelL0(self, 0), - SubmitKernelL0(self, 1), - SubmitKernelSYCL(self, 0), - SubmitKernelSYCL(self, 1), + benches = [] + + # Add SubmitKernel benchmarks using loops + for runtime in self.enabled_runtimes(): + for in_order_queue in [0, 1]: + for measure_completion in [0, 1]: + benches.append( + SubmitKernel(self, runtime, in_order_queue, measure_completion) + ) + + # Add SinKernelGraph benchmarks + for runtime in self.enabled_runtimes(): + for with_graphs in [0, 1]: + for num_kernels in [5, 100]: + benches.append( + GraphApiSinKernelGraph(self, runtime, with_graphs, num_kernels) + ) + + # Add ULLS benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL, RUNTIMES.LEVEL_ZERO]): + benches.append(UllsEmptyKernel(self, runtime, 1000, 256)) + benches.append(UllsKernelSwitch(self, runtime, 8, 200, 0, 0, 1, 1)) + + # Add GraphApiSubmitGraph benchmarks + for runtime in self.enabled_runtimes([RUNTIMES.SYCL]): + for in_order_queue in [0, 1]: + for num_kernels in [4, 10, 32]: + for measure_completion_time in [0, 1]: + benches.append( + GraphApiSubmitGraph( + self, + runtime, + in_order_queue, + num_kernels, + measure_completion_time, + ) + ) + + # Add other benchmarks + benches += [ QueueInOrderMemcpy(self, 0, "Device", "Device", 1024), QueueInOrderMemcpy(self, 0, "Host", "Device", 1024), QueueMemcpy(self, "Device", "Device", 1024), @@ -89,45 +148,14 @@ def benchmarks(self) -> list[Benchmark]: ExecImmediateCopyQueue(self, 0, 1, "Device", "Device", 1024), ExecImmediateCopyQueue(self, 1, 1, "Device", "Host", 1024), VectorSum(self), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.SYCL, 1, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.LEVEL_ZERO, 1, 100), - UllsEmptyKernel(self, RUNTIMES.SYCL, 1000, 256), - UllsEmptyKernel(self, RUNTIMES.LEVEL_ZERO, 1000, 256), - UllsKernelSwitch(self, RUNTIMES.SYCL, 8, 200, 0, 0, 1, 1), - UllsKernelSwitch(self, RUNTIMES.LEVEL_ZERO, 8, 200, 0, 0, 1, 1), ] - for in_order_queue in [0, 1]: - for num_kernels in [4, 32]: - for measure_completion_time in [0, 1]: - benches.append( - GraphApiSubmitGraph( - self, - RUNTIMES.SYCL, - in_order_queue, - num_kernels, - measure_completion_time, - ) - ) - + # Add UR-specific benchmarks if options.ur is not None: benches += [ - SubmitKernelUR(self, 0, 0), - SubmitKernelUR(self, 1, 0), - SubmitKernelUR(self, 1, 1), MemcpyExecute(self, 400, 1, 102400, 10, 1, 1, 1), MemcpyExecute(self, 400, 1, 102400, 10, 0, 1, 1), MemcpyExecute(self, 4096, 4, 1024, 10, 0, 1, 0), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 5), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 0, 100), - GraphApiSinKernelGraph(self, RUNTIMES.UR, 1, 100), ] return benches @@ -228,98 +256,49 @@ def teardown(self): return -class SubmitKernelSYCL(ComputeBenchmark): - def __init__(self, bench, ioq): +class SubmitKernel(ComputeBenchmark): + def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_sycl", "SubmitKernel") + self.runtime = runtime + self.measure_completion = measure_completion + super().__init__( + bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel" + ) def name(self): order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_sycl SubmitKernel {order}" + completion_str = " with measure completion" if self.measure_completion else "" + return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}" def explicit_group(self): - return "SubmitKernel" - - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - "--MeasureCompletion=0", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] - - def description(self) -> str: - order = "in-order" if self.ioq else "out-of-order" return ( - f"Measures CPU time overhead of submitting {order} kernels through SYCL API." - "Uses 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time." - ) - - -class SubmitKernelUR(ComputeBenchmark): - def __init__(self, bench, ioq, measureCompletion): - self.ioq = ioq - self.measureCompletion = measureCompletion - super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_ur SubmitKernel {order}" + ( - " with measure completion" if self.measureCompletion else "" + "SubmitKernel" + if self.measure_completion == 0 + else "SubmitKernel With Completion" ) - def explicit_group(self): - return "SubmitKernel" - def description(self) -> str: order = "in-order" if self.ioq else "out-of-order" - completion = "including" if self.measureCompletion else "excluding" - return ( - f"Measures CPU time overhead of submitting {order} kernels through Unified Runtime API, " - f"{completion} kernel completion time. Uses 10 simple kernels with minimal execution time " - f"to isolate API overhead." - ) + runtime_name = runtime_to_name(self.runtime) - def bin_args(self) -> list[str]: - return [ - f"--Ioq={self.ioq}", - "--DiscardEvents=0", - f"--MeasureCompletion={self.measureCompletion}", - "--iterations=100000", - "--Profiling=0", - "--NumKernels=10", - "--KernelExecTime=1", - ] - - -class SubmitKernelL0(ComputeBenchmark): - def __init__(self, bench, ioq): - self.ioq = ioq - super().__init__(bench, "api_overhead_benchmark_l0", "SubmitKernel") - - def name(self): - order = "in order" if self.ioq else "out of order" - return f"api_overhead_benchmark_l0 SubmitKernel {order}" + completion_desc = "" + if self.runtime == RUNTIMES.UR: + completion_desc = f", {'including' if self.measure_completion else 'excluding'} kernel completion time" - def explicit_group(self): - return "SubmitKernel" + l0_specific = "" + if self.runtime == RUNTIMES.LEVEL_ZERO: + l0_specific = " Uses immediate command lists" - def description(self) -> str: - order = "in-order" if self.ioq else "out-of-order" return ( - f"Measures CPU time overhead of submitting {order} kernels through Level Zero API. " - f"Uses immediate command lists with 10 minimal kernels to isolate submission overhead " - f"from execution time." + f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. " + f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. {l0_specific}" ) def bin_args(self) -> list[str]: return [ f"--Ioq={self.ioq}", "--DiscardEvents=0", - "--MeasureCompletion=0", + f"--MeasureCompletion={self.measure_completion}", "--iterations=100000", "--Profiling=0", "--NumKernels=10", @@ -521,12 +500,6 @@ def bin_args(self) -> list[str]: ] -class RUNTIMES(Enum): - SYCL = "sycl" - LEVEL_ZERO = "l0" - UR = "ur" - - class GraphApiSinKernelGraph(ComputeBenchmark): def __init__(self, bench, runtime: RUNTIMES, withGraphs, numKernels): self.withGraphs = withGraphs diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index cc2db0a2fcf7c..14c0104d0a08c 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -31,8 +31,8 @@ def setup(self): repo_path = git_clone( self.directory, "sycl-bench-repo", - "https://github.com/mateuszpn/sycl-bench.git", - "1e6ab2cfd004a72c5336c26945965017e06eab71", + "https://github.com/unisa-hpc/sycl-bench.git", + "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b", ) configure_command = [ diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index e701b9eac70a2..d05575a5a06ca 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -333,7 +333,7 @@ def validate_and_parse_env_args(env_args): parser.add_argument( "--adapter", type=str, - help="Options to build the Unified Runtime as part of the benchmark", + help="Unified Runtime adapter to use.", default="level_zero", ) parser.add_argument( From cfa4a9cbc5166db535b3754fa7023d01b2589594 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 14 Mar 2025 08:12:22 -0700 Subject: [PATCH 048/114] [CI] configure upload results --- .github/workflows/benchmark.yml | 2 ++ .github/workflows/sycl-linux-run-tests.yml | 6 ++++++ devops/actions/run-tests/benchmark_v2/action.yml | 5 ++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index f044cbb066757..ca0364f94fde5 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -95,6 +95,7 @@ jobs: target_devices: ${{ matrix.backend }} reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 + benchmark_upload_results: ${{ inputs.upload_results }} repo_ref: ${{ matrix.ref }} devops_ref: ${{ github.ref }} sycl_toolchain_artifact: sycl_linux_default @@ -122,4 +123,5 @@ jobs: target_devices: ${{ matrix.backend }} reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 + benchmark_upload_results: ${{ inputs.upload_results }} repo_ref: ${{ github.ref }} diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index f5b243cb7fc05..cc0b5685afec2 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -114,6 +114,11 @@ on: default: '' required: False + benchmark_upload_results: + type: string + required: False + default: 'false' + workflow_dispatch: inputs: runner: @@ -337,6 +342,7 @@ jobs: uses: ./devops/actions/run-tests/benchmark_v2 with: target_devices: ${{ inputs.target_devices }} + upload_results: ${{ inputs.benchmark_upload_results }} env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 375bc20faf857..e75f4b309499d 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -16,6 +16,9 @@ inputs: target_devices: type: string required: True + upload_results: + type: string + required: True runs: using: "composite" @@ -99,7 +102,7 @@ runs: echo "-----" ls - name: Push compute-benchmarks results - if: always() + if: inputs.upload_results == 'true' && always() shell: bash run: | # TODO redo configuration From ca963e6b9aaa91921e41ef2501891a0bec684ac2 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 14 Mar 2025 09:02:05 -0700 Subject: [PATCH 049/114] [CI] Change config to update during workflow run instead --- .github/workflows/sycl-docs.yml | 5 +++++ devops/scripts/benchmarks/html/config.js | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml index a45c56bdd869c..6b748ec9c7ebb 100644 --- a/.github/workflows/sycl-docs.yml +++ b/.github/workflows/sycl-docs.yml @@ -51,6 +51,11 @@ jobs: mv $GITHUB_WORKSPACE/build/tools/clang/docs/html/* clang/ cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks touch .nojekyll + # Update benchmarking dashboard configuration + cat << 'EOF' > benchmarks/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json'; + defaultCompareNames = ["Baseline_PVC_L0"]; + EOF # Upload the generated docs as an artifact and deploy to GitHub Pages. - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/devops/scripts/benchmarks/html/config.js b/devops/scripts/benchmarks/html/config.js index 0a8551c5de152..3e67ae1dce8e5 100644 --- a/devops/scripts/benchmarks/html/config.js +++ b/devops/scripts/benchmarks/html/config.js @@ -1,2 +1,2 @@ -remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json'; +//remoteDataUrl = 'https://example.com/data.json'; //defaultCompareNames = ['baseline']; From 45a02e15ccb3cc01f408c41b3aa27c678c9a30c9 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 14 Mar 2025 09:28:09 -0700 Subject: [PATCH 050/114] [CI] Change save name depending on build --- .github/workflows/benchmark.yml | 1 + .github/workflows/sycl-linux-run-tests.yml | 7 +++- .../actions/run-tests/benchmark_v2/action.yml | 32 +++++++++---------- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ca0364f94fde5..ff2fddb2ae88d 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -96,6 +96,7 @@ jobs: reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 benchmark_upload_results: ${{ inputs.upload_results }} + benchmark_build_hash: ${{ inputs.commit_hash }} repo_ref: ${{ matrix.ref }} devops_ref: ${{ github.ref }} sycl_toolchain_artifact: sycl_linux_default diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index cc0b5685afec2..09821955a5b58 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -116,8 +116,12 @@ on: benchmark_upload_results: type: string - required: False default: 'false' + required: False + benchmark_build_hash: + type: string + default: '' + required: False workflow_dispatch: inputs: @@ -343,6 +347,7 @@ jobs: with: target_devices: ${{ inputs.target_devices }} upload_results: ${{ inputs.benchmark_upload_results }} + build_hash: ${{ inputs.benchmark_build_hash }} env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index e75f4b309499d..bab571ec16ff2 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -19,6 +19,10 @@ inputs: upload_results: type: string required: True + build_hash: + type: string + required: False + default: '' runs: using: "composite" @@ -81,6 +85,8 @@ runs: run: | git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks + env: + BUILD_HASH: ${{ inputs.build_hash }} shell: bash run: | # TODO generate summary + display helpful message here @@ -91,16 +97,22 @@ runs: pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt echo "-----" mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + + # TODO accomodate for different GPUs and backends + SAVE_NAME="Baseline_PVC_L0" + if [ -n "$BUILD_HASH" ]; then + SAVE_NAME="Commit_PVC_$BUILD_HASH" + fi + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ "$(realpath ./llvm_test_workdir)" \ --sycl "$(realpath ./toolchain)" \ - --save baseline \ + --save "$SAVE_NAME" \ --output-html remote \ --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ --preset Minimal echo "-----" - ls - name: Push compute-benchmarks results if: inputs.upload_results == 'true' && always() shell: bash @@ -120,18 +132,4 @@ runs: git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci fi -# - name: Find benchmark result artifact here -# if: always() -# shell: bash -# run: | -# cat << EOF -# # -# # Artifact link for benchmark results here: -# # -# EOF -# - name: Archive compute-benchmark results -# if: always() -# uses: actions/upload-artifact@v4 -# with: -# name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) -# path: ./artifact + From 98f9d388393ec858c92dc72da7d0420362763562 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 14 Mar 2025 09:33:32 -0700 Subject: [PATCH 051/114] bump to 2024-2025 --- devops/scripts/benchmarks/benches/base.py | 2 +- devops/scripts/benchmarks/benches/llamacpp.py | 2 +- devops/scripts/benchmarks/benches/syclbench.py | 2 +- devops/scripts/benchmarks/benches/test.py | 2 +- devops/scripts/benchmarks/benches/umf.py | 2 +- devops/scripts/benchmarks/benches/velocity.py | 2 +- devops/scripts/benchmarks/history.py | 2 +- devops/scripts/benchmarks/html/index.html | 2 +- devops/scripts/benchmarks/html/scripts.js | 2 +- devops/scripts/benchmarks/main.py | 2 +- devops/scripts/benchmarks/output_html.py | 2 +- devops/scripts/benchmarks/presets.py | 2 +- devops/scripts/benchmarks/utils/compute_runtime.py | 2 +- devops/scripts/benchmarks/utils/oneapi.py | 2 +- devops/scripts/benchmarks/utils/result.py | 2 +- devops/scripts/benchmarks/utils/utils.py | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 1bc99b11518e3..3ca6e3a7b7d3b 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index c12f811942849..38633912b001a 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index 14c0104d0a08c..b846b0853ce66 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 4862bc64ecbaf..7afdd803b5cc3 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index 1f736e7755f92..e465d5e9e01c9 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 652a831d0222e..916a321b143cf 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index f05e0192d26ee..0b80c54ad7393 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index 32a00ab67bb47..dc79c2a4781b6 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -1,5 +1,5 @@ +
+
-
+
Historical Results
-
+
Comparisons
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index a3ef986efdf14..547bcc77bcf31 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -8,9 +8,10 @@ let activeRuns = new Set(defaultCompareNames); let chartInstances = new Map(); let suiteNames = new Set(); let timeseriesData, barChartsData, allRunNames; +let activeTags = new Set(); // DOM Elements -let runSelect, selectedRunsDiv, suiteFiltersContainer; +let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer; // Run selector functions function updateSelectedRuns(forceUpdate = true) { @@ -218,6 +219,14 @@ function createChartContainer(data, canvasId, type) { container.appendChild(unstableWarning); } + // Add description if present in metadata (moved outside of details) + if (metadata && metadata.description) { + const descElement = document.createElement('div'); + descElement.className = 'benchmark-description'; + descElement.textContent = metadata.description; + container.appendChild(descElement); + } + // Add notes if present if (metadata && metadata.notes) { const noteElement = document.createElement('div'); @@ -227,12 +236,29 @@ function createChartContainer(data, canvasId, type) { container.appendChild(noteElement); } - // Add description if present in metadata, but only for groups - if (metadata && metadata.description && metadata.type === "group") { - const descElement = document.createElement('div'); - descElement.className = 'benchmark-description'; - descElement.textContent = metadata.description; - container.appendChild(descElement); + // Add tags if present + if (metadata && metadata.tags) { + container.setAttribute('data-tags', metadata.tags.join(',')); + + // Add tags display + const tagsContainer = document.createElement('div'); + tagsContainer.className = 'benchmark-tags'; + + metadata.tags.forEach(tag => { + const tagElement = document.createElement('span'); + tagElement.className = 'tag'; + tagElement.textContent = tag; + tagElement.setAttribute('data-tag', tag); + + // Add tooltip with tag description + if (benchmarkTags[tag]) { + tagElement.setAttribute('title', benchmarkTags[tag].description); + } + + tagsContainer.appendChild(tagElement); + }); + + container.appendChild(tagsContainer); } const canvas = document.createElement('canvas'); @@ -358,6 +384,7 @@ function updateURL() { const regex = document.getElementById('bench-filter').value; const activeSuites = getActiveSuites(); const activeRunsList = Array.from(activeRuns); + const activeTagsList = Array.from(activeTags); if (regex) { url.searchParams.set('regex', regex); @@ -371,6 +398,13 @@ function updateURL() { url.searchParams.delete('suites'); } + // Add tags to URL + if (activeTagsList.length > 0) { + url.searchParams.set('tags', activeTagsList.join(',')); + } else { + url.searchParams.delete('tags'); + } + // Handle the runs parameter if (activeRunsList.length > 0) { // Check if the active runs are the same as default runs @@ -404,11 +438,18 @@ function filterCharts() { const label = container.getAttribute('data-label'); const suite = container.getAttribute('data-suite'); const isUnstable = container.getAttribute('data-unstable') === 'true'; + const tags = container.getAttribute('data-tags') ? + container.getAttribute('data-tags').split(',') : []; + + // Check if benchmark has all active tags (if any are selected) + const hasAllActiveTags = activeTags.size === 0 || + Array.from(activeTags).every(tag => tags.includes(tag)); // Hide unstable benchmarks if showUnstable is false const shouldShow = regex.test(label) && activeSuites.includes(suite) && - (isUnstableEnabled() || !isUnstable); + (isUnstableEnabled() || !isUnstable) && + hasAllActiveTags; container.style.display = shouldShow ? '' : 'none'; }); @@ -585,6 +626,77 @@ function setupToggles() { } } +function setupTagFilters() { + tagFiltersContainer = document.getElementById('tag-filters'); + + // Get all unique tags from benchmark metadata + const allTags = new Set(); + + for (const [key, metadata] of Object.entries(benchmarkMetadata)) { + if (metadata.tags) { + metadata.tags.forEach(tag => allTags.add(tag)); + } + } + + // Sort tags alphabetically + const sortedTags = Array.from(allTags).sort(); + + // Create tag filter elements + sortedTags.forEach(tag => { + const tagContainer = document.createElement('div'); + tagContainer.className = 'tag-filter'; + + const checkbox = document.createElement('input'); + checkbox.type = 'checkbox'; + checkbox.id = `tag-${tag}`; + checkbox.className = 'tag-checkbox'; + checkbox.dataset.tag = tag; + + const label = document.createElement('label'); + label.htmlFor = `tag-${tag}`; + label.textContent = tag; + + // Add info icon with tooltip if tag description exists + if (benchmarkTags[tag]) { + const infoIcon = document.createElement('span'); + infoIcon.className = 'tag-info'; + infoIcon.textContent = 'ⓘ'; + infoIcon.title = benchmarkTags[tag].description; + label.appendChild(infoIcon); + } + + checkbox.addEventListener('change', function() { + if (this.checked) { + activeTags.add(tag); + } else { + activeTags.delete(tag); + } + filterCharts(); + }); + + tagContainer.appendChild(checkbox); + tagContainer.appendChild(label); + tagFiltersContainer.appendChild(tagContainer); + }); +} + +function toggleAllTags(select) { + const checkboxes = document.querySelectorAll('.tag-checkbox'); + + checkboxes.forEach(checkbox => { + checkbox.checked = select; + const tag = checkbox.dataset.tag; + + if (select) { + activeTags.add(tag); + } else { + activeTags.delete(tag); + } + }); + + filterCharts(); +} + function initializeCharts() { // Process raw data timeseriesData = processTimeseriesData(benchmarkRuns); @@ -621,11 +733,13 @@ function initializeCharts() { // Setup UI components setupRunSelector(); setupSuiteFilters(); + setupTagFilters(); setupToggles(); // Apply URL parameters const regexParam = getQueryParam('regex'); const suitesParam = getQueryParam('suites'); + const tagsParam = getQueryParam('tags'); if (regexParam) { document.getElementById('bench-filter').value = regexParam; @@ -638,6 +752,18 @@ function initializeCharts() { }); } + // Apply tag filters from URL + if (tagsParam) { + const tags = tagsParam.split(','); + tags.forEach(tag => { + const checkbox = document.querySelector(`.tag-checkbox[data-tag="${tag}"]`); + if (checkbox) { + checkbox.checked = true; + activeTags.add(tag); + } + }); + } + // Setup event listeners document.querySelectorAll('.suite-checkbox').forEach(checkbox => { checkbox.addEventListener('change', filterCharts); @@ -651,6 +777,7 @@ function initializeCharts() { // Make functions available globally for onclick handlers window.addSelectedRun = addSelectedRun; window.removeRun = removeRun; +window.toggleAllTags = toggleAllTags; // Load data based on configuration function loadData() { diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css new file mode 100644 index 0000000000000..9a3c5fe69b287 --- /dev/null +++ b/devops/scripts/benchmarks/html/styles.css @@ -0,0 +1,373 @@ +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + margin: 0; + padding: 16px; + background: #f8f9fa; +} +.container { + max-width: 1100px; + margin: 0 auto; +} +h1, h2 { + color: #212529; + text-align: center; + margin-bottom: 24px; + font-weight: 500; +} +.chart-container { + background: white; + border-radius: 8px; + padding: 24px; + margin-bottom: 24px; + box-shadow: 0 1px 3px rgba(0,0,0,0.1); +} +@media (max-width: 768px) { + body { + padding: 12px; + } + .chart-container { + padding: 16px; + border-radius: 6px; + } + h1 { + font-size: 24px; + margin-bottom: 16px; + } +} +.filter-container { + text-align: center; + margin-bottom: 24px; +} +.filter-container input { + padding: 8px; + font-size: 16px; + border: 1px solid #ccc; + border-radius: 4px; + width: 400px; + max-width: 100%; +} +.suite-filter-container { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.suite-checkbox { + margin: 0 8px; +} +details { + margin-bottom: 24px; +} +summary { + display: flex; + justify-content: space-between; + align-items: center; + font-size: 16px; + font-weight: 500; + cursor: pointer; + padding: 12px 16px; + background: #dee2e6; + border-radius: 8px; + user-select: none; +} +summary:hover { + background: #ced4da; +} +summary::marker { + display: none; +} +summary::-webkit-details-marker { + display: none; +} +summary::after { + content: "▼"; + font-size: 12px; + margin-left: 8px; + transition: transform 0.3s; +} +details[open] summary::after { + transform: rotate(180deg); +} +.extra-info { + padding: 8px; + background: #f8f9fa; + border-radius: 8px; + margin-top: 8px; +} +.run-selector { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.run-selector select { + width: 300px; + padding: 8px; + margin-right: 8px; +} +.run-selector button { + padding: 8px 16px; + background: #0068B5; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; +} +.run-selector button:hover { + background: #00C7FD; +} +.selected-runs { + margin-top: 12px; +} +.selected-run { + display: inline-block; + padding: 4px 8px; + margin: 4px; + background: #e2e6ea; + border-radius: 4px; +} +.selected-run button { + margin-left: 8px; + padding: 0 4px; + background: none; + border: none; + color: #dc3545; + cursor: pointer; +} +.download-button { + background: none; + border: none; + color: #0068B5; + cursor: pointer; + font-size: 16px; + padding: 4px; + margin-left: 8px; +} +.download-button:hover { + color: #00C7FD; +} +.loading-indicator { + text-align: center; + font-size: 18px; + color: #0068B5; + margin-bottom: 20px; +} +.extra-info-entry { + border: 1px solid #ddd; + padding: 10px; + margin-bottom: 10px; + background-color: #f9f9f9; + border-radius: 5px; +} +.extra-info-entry strong { + display: block; + margin-bottom: 5px; +} +.extra-info-entry em { + color: #555; +} +.display-options-container { + text-align: center; + margin-bottom: 24px; + padding: 16px; + background: #e9ecef; + border-radius: 8px; +} +.display-options-container label { + margin: 0 12px; + cursor: pointer; +} +.display-options-container input { + margin-right: 8px; +} +.benchmark-note { + background-color: #cfe2ff; + color: #084298; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #084298; + white-space: pre-line; +} +.benchmark-unstable { + background-color: #f8d7da; + color: #842029; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #842029; + white-space: pre-line; +} +.note-text { + color: #084298; +} +.unstable-warning { + color: #842029; + font-weight: bold; +} +.unstable-text { + color: #842029; +} +.options-container { + margin-bottom: 24px; + background: #e9ecef; + border-radius: 8px; + overflow: hidden; +} +.options-container summary { + padding: 12px 16px; + font-weight: 500; + cursor: pointer; + background: #dee2e6; + user-select: none; +} +.options-container summary:hover { + background: #ced4da; +} +.options-content { + padding: 16px; + display: flex; + flex-wrap: wrap; + gap: 24px; +} +.filter-section { + flex: 1; + min-width: 300px; +} +.filter-section h3 { + margin-top: 0; + margin-bottom: 12px; + font-size: 18px; + font-weight: 500; + text-align: left; +} +#suite-filters { + display: flex; + flex-wrap: wrap; + gap: 8px; +} +.display-options { + display: flex; + flex-direction: column; + gap: 8px; +} +.display-options label { + display: flex; + align-items: center; + cursor: pointer; +} +.display-options input { + margin-right: 8px; +} +.benchmark-description { + background-color: #f2f2f2; + color: #333; + padding: 10px; + margin-bottom: 10px; + border-radius: 5px; + border-left: 4px solid #6c757d; + white-space: pre-line; + font-style: italic; +} +/* Tag styles */ +.benchmark-tags { + display: flex; + flex-wrap: wrap; + gap: 4px; + margin-bottom: 10px; +} + +.tag { + display: inline-block; + background-color: #e2e6ea; + color: #495057; + padding: 2px 8px; + border-radius: 12px; + font-size: 12px; + cursor: default; +} + +.tag-filter { + display: inline-flex; + align-items: center; + margin: 4px; +} + +.tag-filter label { + margin-left: 4px; + cursor: pointer; + display: flex; + align-items: center; +} + +.tag-info { + color: #0068B5; + margin-left: 4px; + cursor: help; + font-size: 12px; +} + +#tag-filters { + display: flex; + flex-wrap: wrap; + max-height: 200px; + overflow-y: auto; + border: 1px solid #dee2e6; + border-radius: 4px; + padding: 8px; + background-color: #f8f9fa; +} + +.tag-filter-actions { + margin-bottom: 8px; + display: flex; + gap: 8px; +} + +.tag-filter-actions button { + padding: 4px 8px; + background: #e2e6ea; + border: none; + border-radius: 4px; + cursor: pointer; +} + +.tag-filter-actions button:hover { + background: #ced4da; +} + +#active-tags { + display: none; + flex-wrap: wrap; + gap: 8px; + margin-top: 12px; + padding: 8px; + background-color: #f8f9fa; + border-radius: 4px; +} + +.active-tag { + display: flex; + align-items: center; + background-color: #0068B5; + color: white; + padding: 4px 8px; + border-radius: 12px; + font-size: 14px; +} + +.remove-tag { + background: none; + border: none; + color: white; + margin-left: 4px; + cursor: pointer; + font-size: 16px; + padding: 0 4px; +} + +.remove-tag:hover { + color: #f8d7da; +} diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index e69dfeb153b49..429b24eb632c8 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -6,7 +6,8 @@ import json import os from options import options -from utils.result import BenchmarkMetadata +from utils.result import BenchmarkMetadata, BenchmarkOutput +from benches.base import benchmark_tags, benchmark_tags_dict def generate_html( @@ -16,30 +17,33 @@ def generate_html( metadata: dict[str, BenchmarkMetadata], ): benchmark_runs.sort(key=lambda run: run.date, reverse=True) - serializable_metadata = {k: v.__dict__ for k, v in metadata.items()} - - serializable_runs = [json.loads(run.to_json()) for run in benchmark_runs] - - data = { - "runs": serializable_runs, - "metadata": serializable_metadata, - "defaultCompareNames": compare_names, - } + + # Create the comprehensive output object + output = BenchmarkOutput( + runs=benchmark_runs, + metadata=metadata, + tags=benchmark_tags_dict, + default_compare_names=compare_names + ) if options.output_html == "local": data_path = os.path.join(html_path, "data.js") with open(data_path, "w") as f: # For local format, we need to write JavaScript variable assignments f.write("benchmarkRuns = ") - json.dump(data["runs"], f, indent=2) + json.dump(json.loads(output.to_json())["runs"], f, indent=2) f.write(";\n\n") f.write("benchmarkMetadata = ") - json.dump(data["metadata"], f, indent=2) + json.dump(json.loads(output.to_json())["metadata"], f, indent=2) + f.write(";\n\n") + + f.write("benchmarkTags = ") + json.dump(json.loads(output.to_json())["tags"], f, indent=2) f.write(";\n\n") f.write("defaultCompareNames = ") - json.dump(data["defaultCompareNames"], f, indent=2) + json.dump(output.default_compare_names, f, indent=2) f.write(";\n") print(f"See {os.getcwd()}/html/index.html for the results.") @@ -47,7 +51,7 @@ def generate_html( # For remote format, we write a single JSON file data_path = os.path.join(html_path, "data.json") with open(data_path, "w") as f: - json.dump(data, f, indent=2) + json.dump(json.loads(output.to_json()), f, indent=2) print( f"Upload {data_path} to a location set in config.js remoteDataUrl argument." diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py index 0d450ad7b9154..82fc7ca1fddc2 100644 --- a/devops/scripts/benchmarks/utils/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from dataclasses import dataclass, field -from typing import Optional +from typing import Optional, Dict, List, Any from dataclasses_json import config, dataclass_json from datetime import datetime @@ -57,4 +57,13 @@ class BenchmarkMetadata: description: Optional[str] = None notes: Optional[str] = None unstable: Optional[str] = None - tags: list[BenchmarkTag] = field(default_factory=list) + tags: list[str] = field(default_factory=list) # Changed to list of tag names + + +@dataclass_json +@dataclass +class BenchmarkOutput: + runs: list[BenchmarkRun] + metadata: Dict[str, BenchmarkMetadata] + tags: Dict[str, BenchmarkTag] + default_compare_names: List[str] = field(default_factory=list) From 3662b430fa20585aebeec6a256433160b7e8764d Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 13:01:40 +0100 Subject: [PATCH 066/114] tiny tweaks for benchmark tags --- devops/scripts/benchmarks/benches/base.py | 2 +- devops/scripts/benchmarks/benches/compute.py | 4 ++-- devops/scripts/benchmarks/html/scripts.js | 20 +++++++++----------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 16ff5605b08df..209dc993ae53c 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -110,7 +110,7 @@ def name(self): raise NotImplementedError() def description(self): - return "No description provided." + return "" def notes(self) -> str: return None diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 00db6bdd224d1..bff535dd2c335 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -84,12 +84,12 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" "Work is ongoing to reduce the overhead of the SYCL API\n", - tags=['submit', 'micro', 'sycl', 'ur', 'l0'] + tags=['submit', 'micro', 'sycl', 'ur', 'L0'] ), "SinKernelGraph": BenchmarkMetadata( type="group", unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", - tags=['submit', 'micro', 'sycl', 'ur', 'L0'] + tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph'] ), "SubmitGraph": BenchmarkMetadata( type="group", diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 547bcc77bcf31..fbfb496533194 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -628,21 +628,19 @@ function setupToggles() { function setupTagFilters() { tagFiltersContainer = document.getElementById('tag-filters'); + + const allTags = []; - // Get all unique tags from benchmark metadata - const allTags = new Set(); - - for (const [key, metadata] of Object.entries(benchmarkMetadata)) { - if (metadata.tags) { - metadata.tags.forEach(tag => allTags.add(tag)); + if (benchmarkTags) { + for (const tag in benchmarkTags) { + if (!allTags.includes(tag)) { + allTags.push(tag); + } } } - - // Sort tags alphabetically - const sortedTags = Array.from(allTags).sort(); - + // Create tag filter elements - sortedTags.forEach(tag => { + allTags.forEach(tag => { const tagContainer = document.createElement('div'); tagContainer.className = 'tag-filter'; From 75dd2294adb0682dcab400ce66897ee2d404bbc6 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 13:23:26 +0100 Subject: [PATCH 067/114] better and more tags --- devops/scripts/benchmarks/benches/base.py | 31 +++++++++++------- devops/scripts/benchmarks/benches/compute.py | 32 +++++++++---------- devops/scripts/benchmarks/benches/llamacpp.py | 2 +- .../scripts/benchmarks/benches/syclbench.py | 11 ++++++- devops/scripts/benchmarks/benches/umf.py | 2 +- devops/scripts/benchmarks/benches/velocity.py | 29 ++++++++++++++++- 6 files changed, 75 insertions(+), 32 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 209dc993ae53c..901235f6e1455 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -11,18 +11,25 @@ from options import options from utils.utils import download, run -benchmark_tags = [BenchmarkTag('sycl', 'Benchmark uses SYCL RT'), - BenchmarkTag('ur', 'Benchmark uses Unified Runtime'), - BenchmarkTag('L0', 'Benchmark uses L0 directly'), - BenchmarkTag('umf', 'Benchmark uses UMF directly'), - BenchmarkTag('micro', 'Microbenchmark focusing on a specific niche'), - BenchmarkTag('application', 'Real application-based performance test'), - BenchmarkTag('proxy', 'Benchmark that tries to implement a real application use-case'), - BenchmarkTag('submit', 'Benchmark tests the kernel submit path'), - BenchmarkTag('math', 'Benchmark tests math compute performance'), - BenchmarkTag('memory', 'Benchmark tests memory transfer performance'), - BenchmarkTag('allocation', 'Benchmark tests memory allocation performance'), - BenchmarkTag('graph', 'Benchmark tests graph performance'),] +benchmark_tags = [ + BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'), + BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'), + BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'), + BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'), + BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'), + BenchmarkTag('application', 'Real application-based performance test'), + BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'), + BenchmarkTag('submit', 'Tests kernel submission performance'), + BenchmarkTag('math', 'Tests math computation performance'), + BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'), + BenchmarkTag('allocation', 'Tests memory allocation performance'), + BenchmarkTag('graph', 'Tests graph-based execution performance'), + BenchmarkTag('latency', 'Measures operation latency'), + BenchmarkTag('throughput', 'Measures operation throughput'), + BenchmarkTag('inference', 'Tests ML/AI inference performance'), + BenchmarkTag('image', 'Image processing benchmark'), + BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'), +] benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags} diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index bff535dd2c335..e1f24e0178789 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -28,9 +28,9 @@ def runtime_to_name(runtime: RUNTIMES) -> str: def runtime_to_tag_name(runtime: RUNTIMES) -> str: return { - RUNTIMES.SYCL: "sycl", + RUNTIMES.SYCL: "SYCL", RUNTIMES.LEVEL_ZERO: "L0", - RUNTIMES.UR: "ur", + RUNTIMES.UR: "UR", }[runtime] @@ -84,16 +84,16 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" "Work is ongoing to reduce the overhead of the SYCL API\n", - tags=['submit', 'micro', 'sycl', 'ur', 'L0'] + tags=['submit', 'micro', 'SYCL', 'UR', 'L0'] ), "SinKernelGraph": BenchmarkMetadata( type="group", unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", - tags=['submit', 'memory', 'proxy', 'sycl', 'ur', 'L0', 'graph'] + tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph'] ), "SubmitGraph": BenchmarkMetadata( type="group", - tags=['submit', 'micro', 'sycl', 'ur', 'L0', 'graph'] + tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph'] ), } @@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): ) def get_tags(self): - return ['submit', runtime_to_tag_name(self.runtime), 'micro'] + return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro'] def name(self): order = "in order" if self.ioq else "out of order" @@ -344,7 +344,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'sycl', 'micro'] + return ['memory', 'submit', 'latency', 'SYCL', 'micro'] def bin_args(self) -> list[str]: return [ @@ -377,7 +377,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'sycl', 'micro'] + return ['memory', 'latency', 'SYCL', 'micro'] def bin_args(self) -> list[str]: return [ @@ -407,7 +407,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'sycl', 'micro'] + return ['memory', 'latency', 'SYCL', 'micro'] def bin_args(self) -> list[str]: return [ @@ -439,7 +439,7 @@ def lower_is_better(self): return False def get_tags(self): - return ['memory', 'sycl', 'micro'] + return ['memory', 'throughput', 'SYCL', 'micro'] def bin_args(self) -> list[str]: return [ @@ -468,7 +468,7 @@ def description(self) -> str: ) def get_tags(self): - return ['math', 'sycl', 'micro'] + return ['math', 'throughput', 'SYCL', 'micro'] def bin_args(self) -> list[str]: return [ @@ -517,7 +517,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'ur', 'micro'] + return ['memory', 'latency', 'UR', 'micro'] def bin_args(self) -> list[str]: return [ @@ -560,7 +560,7 @@ def unstable(self) -> str: return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." def get_tags(self): - return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory'] + return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency'] def bin_args(self) -> list[str]: return [ @@ -595,7 +595,7 @@ def name(self): return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" def get_tags(self): - return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit'] + return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency'] def bin_args(self) -> list[str]: return [ @@ -625,7 +625,7 @@ def name(self): return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" def get_tags(self): - return [runtime_to_tag_name(self.runtime), 'micro'] + return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit'] def bin_args(self) -> list[str]: return [ @@ -666,7 +666,7 @@ def name(self): return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" def get_tags(self): - return [runtime_to_tag_name(self.runtime), 'micro'] + return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit'] def bin_args(self) -> list[str]: return [ diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index f0b5694b52dc8..cf203bca17f4f 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -102,7 +102,7 @@ def description(self) -> str: ) def get_tags(self): - return ['sycl', 'application'] + return ['SYCL', 'application', 'inference', 'throughput'] def lower_is_better(self): return False diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index d9d435baa064e..50f35182eaddc 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -113,7 +113,16 @@ def extra_env_vars(self) -> dict: return {} def get_tags(self): - return ['sycl', 'micro'] + base_tags = ['SYCL', 'micro'] + if "Memory" in self.bench_name or "mem" in self.bench_name.lower(): + base_tags.append('memory') + if "Reduction" in self.bench_name: + base_tags.append('math') + if "Bandwidth" in self.bench_name: + base_tags.append('throughput') + if "Latency" in self.bench_name: + base_tags.append('latency') + return base_tags def setup(self): self.benchmark_bin = os.path.join( diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index ea2ecfd175a85..60964fcf93298 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -75,7 +75,7 @@ def setup(self): self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) def get_tags(self): - return ['umf', 'allocation'] + return ['UMF', 'allocation', 'latency', 'micro'] def run(self, env_vars) -> list[Result]: command = [ diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 6ff3178202481..623079067b91d 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -119,7 +119,7 @@ def description(self) -> str: return "" def get_tags(self): - return ['sycl', 'application'] + return ['SYCL', 'application'] def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) @@ -175,6 +175,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse keys per second from benchmark output." ) + def get_tags(self): + return ['SYCL', 'application', 'throughput'] + class Bitcracker(VelocityBase): def __init__(self, vb: VelocityBench): @@ -213,6 +216,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ['SYCL', 'application', 'throughput'] + class SobelFilter(VelocityBase): def __init__(self, vb: VelocityBench): @@ -259,6 +265,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ['SYCL', 'application', 'image', 'throughput'] + class QuickSilver(VelocityBase): def __init__(self, vb: VelocityBench): @@ -306,6 +315,9 @@ def parse_output(self, stdout: str) -> float: "{self.__class__.__name__}: Failed to parse benchmark output." ) + def get_tags(self): + return ['SYCL', 'application', 'simulation', 'throughput'] + class Easywave(VelocityBase): def __init__(self, vb: VelocityBench): @@ -370,6 +382,9 @@ def parse_output(self, stdout: str) -> float: os.path.join(options.benchmark_cwd, "easywave.log") ) + def get_tags(self): + return ['SYCL', 'application', 'simulation'] + class CudaSift(VelocityBase): def __init__(self, vb: VelocityBench): @@ -398,6 +413,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ['SYCL', 'application', 'image'] + class DLCifar(VelocityBase): def __init__(self, vb: VelocityBench): @@ -449,6 +467,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ['SYCL', 'application', 'inference', 'image'] + class DLMnist(VelocityBase): def __init__(self, vb: VelocityBench): @@ -534,6 +555,9 @@ def parse_output(self, stdout: str) -> float: else: raise ValueError("Failed to parse benchmark output.") + def get_tags(self): + return ['SYCL', 'application', 'inference', 'image'] + class SVM(VelocityBase): def __init__(self, vb: VelocityBench): @@ -576,3 +600,6 @@ def parse_output(self, stdout: str) -> float: return float(match.group(1)) else: raise ValueError("Failed to parse benchmark output.") + + def get_tags(self): + return ['SYCL', 'application', 'inference'] From cec8f05d40a00981e04c97ecb0abb47b4d2fa4de Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 14:31:27 +0100 Subject: [PATCH 068/114] formatting --- devops/scripts/benchmarks/benches/base.py | 38 +++++++-------- devops/scripts/benchmarks/benches/compute.py | 43 +++++++++++------ devops/scripts/benchmarks/benches/llamacpp.py | 2 +- .../scripts/benchmarks/benches/syclbench.py | 10 ++-- devops/scripts/benchmarks/benches/umf.py | 2 +- devops/scripts/benchmarks/benches/velocity.py | 20 ++++---- devops/scripts/benchmarks/html/index.html | 19 ++++---- devops/scripts/benchmarks/html/styles.css | 46 ++++++------------- devops/scripts/benchmarks/options.py | 1 + devops/scripts/benchmarks/output_html.py | 6 +-- .../benchmarks/utils/compute_runtime.py | 5 +- 11 files changed, 96 insertions(+), 96 deletions(-) diff --git a/devops/scripts/benchmarks/benches/base.py b/devops/scripts/benchmarks/benches/base.py index 901235f6e1455..4c2973d250e3d 100644 --- a/devops/scripts/benchmarks/benches/base.py +++ b/devops/scripts/benchmarks/benches/base.py @@ -12,27 +12,28 @@ from utils.utils import download, run benchmark_tags = [ - BenchmarkTag('SYCL', 'Benchmark uses SYCL runtime'), - BenchmarkTag('UR', 'Benchmark uses Unified Runtime API'), - BenchmarkTag('L0', 'Benchmark uses Level Zero API directly'), - BenchmarkTag('UMF', 'Benchmark uses Unified Memory Framework directly'), - BenchmarkTag('micro', 'Microbenchmark focusing on a specific functionality'), - BenchmarkTag('application', 'Real application-based performance test'), - BenchmarkTag('proxy', 'Benchmark that simulates real application use-cases'), - BenchmarkTag('submit', 'Tests kernel submission performance'), - BenchmarkTag('math', 'Tests math computation performance'), - BenchmarkTag('memory', 'Tests memory transfer or bandwidth performance'), - BenchmarkTag('allocation', 'Tests memory allocation performance'), - BenchmarkTag('graph', 'Tests graph-based execution performance'), - BenchmarkTag('latency', 'Measures operation latency'), - BenchmarkTag('throughput', 'Measures operation throughput'), - BenchmarkTag('inference', 'Tests ML/AI inference performance'), - BenchmarkTag('image', 'Image processing benchmark'), - BenchmarkTag('simulation', 'Physics or scientific simulation benchmark'), + BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"), + BenchmarkTag("UR", "Benchmark uses Unified Runtime API"), + BenchmarkTag("L0", "Benchmark uses Level Zero API directly"), + BenchmarkTag("UMF", "Benchmark uses Unified Memory Framework directly"), + BenchmarkTag("micro", "Microbenchmark focusing on a specific functionality"), + BenchmarkTag("application", "Real application-based performance test"), + BenchmarkTag("proxy", "Benchmark that simulates real application use-cases"), + BenchmarkTag("submit", "Tests kernel submission performance"), + BenchmarkTag("math", "Tests math computation performance"), + BenchmarkTag("memory", "Tests memory transfer or bandwidth performance"), + BenchmarkTag("allocation", "Tests memory allocation performance"), + BenchmarkTag("graph", "Tests graph-based execution performance"), + BenchmarkTag("latency", "Measures operation latency"), + BenchmarkTag("throughput", "Measures operation throughput"), + BenchmarkTag("inference", "Tests ML/AI inference performance"), + BenchmarkTag("image", "Image processing benchmark"), + BenchmarkTag("simulation", "Physics or scientific simulation benchmark"), ] benchmark_tags_dict = {tag.name: tag for tag in benchmark_tags} + class Benchmark: def __init__(self, directory, suite): self.directory = directory @@ -134,9 +135,10 @@ def get_metadata(self) -> BenchmarkMetadata: description=self.description(), notes=self.notes(), unstable=self.unstable(), - tags=self.get_tags() + tags=self.get_tags(), ) + class Suite: def benchmarks(self) -> list[Benchmark]: raise NotImplementedError() diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index e1f24e0178789..cd4ab7cd9b26c 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -26,6 +26,7 @@ def runtime_to_name(runtime: RUNTIMES) -> str: RUNTIMES.UR: "Unified Runtime", }[runtime] + def runtime_to_tag_name(runtime: RUNTIMES) -> str: return { RUNTIMES.SYCL: "SYCL", @@ -84,16 +85,15 @@ def additionalMetadata(self) -> dict[str, BenchmarkMetadata]: "The first layer is the Level Zero API, the second is the Unified Runtime API, and the third is the SYCL API.\n" "The UR v2 adapter noticeably reduces UR layer overhead, also improving SYCL performance.\n" "Work is ongoing to reduce the overhead of the SYCL API\n", - tags=['submit', 'micro', 'SYCL', 'UR', 'L0'] + tags=["submit", "micro", "SYCL", "UR", "L0"], ), "SinKernelGraph": BenchmarkMetadata( type="group", unstable="This benchmark combines both eager and graph execution, and may not be representative of real use cases.", - tags=['submit', 'memory', 'proxy', 'SYCL', 'UR', 'L0', 'graph'] + tags=["submit", "memory", "proxy", "SYCL", "UR", "L0", "graph"], ), "SubmitGraph": BenchmarkMetadata( - type="group", - tags=['submit', 'micro', 'SYCL', 'UR', 'L0', 'graph'] + type="group", tags=["submit", "micro", "SYCL", "UR", "L0", "graph"] ), } @@ -279,7 +279,7 @@ def __init__(self, bench, runtime: RUNTIMES, ioq, measure_completion=0): ) def get_tags(self): - return ['submit', 'latency', runtime_to_tag_name(self.runtime), 'micro'] + return ["submit", "latency", runtime_to_tag_name(self.runtime), "micro"] def name(self): order = "in order" if self.ioq else "out of order" @@ -344,7 +344,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'submit', 'latency', 'SYCL', 'micro'] + return ["memory", "submit", "latency", "SYCL", "micro"] def bin_args(self) -> list[str]: return [ @@ -377,7 +377,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'latency', 'SYCL', 'micro'] + return ["memory", "latency", "SYCL", "micro"] def bin_args(self) -> list[str]: return [ @@ -407,7 +407,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'latency', 'SYCL', 'micro'] + return ["memory", "latency", "SYCL", "micro"] def bin_args(self) -> list[str]: return [ @@ -439,7 +439,7 @@ def lower_is_better(self): return False def get_tags(self): - return ['memory', 'throughput', 'SYCL', 'micro'] + return ["memory", "throughput", "SYCL", "micro"] def bin_args(self) -> list[str]: return [ @@ -468,7 +468,7 @@ def description(self) -> str: ) def get_tags(self): - return ['math', 'throughput', 'SYCL', 'micro'] + return ["math", "throughput", "SYCL", "micro"] def bin_args(self) -> list[str]: return [ @@ -517,7 +517,7 @@ def description(self) -> str: ) def get_tags(self): - return ['memory', 'latency', 'UR', 'micro'] + return ["memory", "latency", "UR", "micro"] def bin_args(self) -> list[str]: return [ @@ -560,7 +560,14 @@ def unstable(self) -> str: return "This benchmark combines both eager and graph execution, and may not be representative of real use cases." def get_tags(self): - return ['graph', runtime_to_tag_name(self.runtime), 'proxy', 'submit', 'memory', 'latency'] + return [ + "graph", + runtime_to_tag_name(self.runtime), + "proxy", + "submit", + "memory", + "latency", + ] def bin_args(self) -> list[str]: return [ @@ -595,7 +602,13 @@ def name(self): return f"graph_api_benchmark_{self.runtime.value} SubmitGraph numKernels:{self.numKernels} ioq {self.inOrderQueue} measureCompletion {self.measureCompletionTime}" def get_tags(self): - return ['graph', runtime_to_tag_name(self.runtime), 'micro', 'submit', 'latency'] + return [ + "graph", + runtime_to_tag_name(self.runtime), + "micro", + "submit", + "latency", + ] def bin_args(self) -> list[str]: return [ @@ -625,7 +638,7 @@ def name(self): return f"ulls_benchmark_{self.runtime.value} EmptyKernel wgc:{self.wgc}, wgs:{self.wgs}" def get_tags(self): - return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit'] + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] def bin_args(self) -> list[str]: return [ @@ -666,7 +679,7 @@ def name(self): return f"ulls_benchmark_{self.runtime.value} KernelSwitch count {self.count} kernelTime {self.kernelTime}" def get_tags(self): - return [runtime_to_tag_name(self.runtime), 'micro', 'latency', 'submit'] + return [runtime_to_tag_name(self.runtime), "micro", "latency", "submit"] def bin_args(self) -> list[str]: return [ diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index cf203bca17f4f..19af2498a0a63 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -102,7 +102,7 @@ def description(self) -> str: ) def get_tags(self): - return ['SYCL', 'application', 'inference', 'throughput'] + return ["SYCL", "application", "inference", "throughput"] def lower_is_better(self): return False diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index 50f35182eaddc..f1e366aa5bc4b 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -113,15 +113,15 @@ def extra_env_vars(self) -> dict: return {} def get_tags(self): - base_tags = ['SYCL', 'micro'] + base_tags = ["SYCL", "micro"] if "Memory" in self.bench_name or "mem" in self.bench_name.lower(): - base_tags.append('memory') + base_tags.append("memory") if "Reduction" in self.bench_name: - base_tags.append('math') + base_tags.append("math") if "Bandwidth" in self.bench_name: - base_tags.append('throughput') + base_tags.append("throughput") if "Latency" in self.bench_name: - base_tags.append('latency') + base_tags.append("latency") return base_tags def setup(self): diff --git a/devops/scripts/benchmarks/benches/umf.py b/devops/scripts/benchmarks/benches/umf.py index 60964fcf93298..f0b92777dd2f8 100644 --- a/devops/scripts/benchmarks/benches/umf.py +++ b/devops/scripts/benchmarks/benches/umf.py @@ -75,7 +75,7 @@ def setup(self): self.benchmark_bin = os.path.join(options.umf, "benchmark", self.bench_name) def get_tags(self): - return ['UMF', 'allocation', 'latency', 'micro'] + return ["UMF", "allocation", "latency", "micro"] def run(self, env_vars) -> list[Result]: command = [ diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 623079067b91d..0e1f20999c731 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -119,7 +119,7 @@ def description(self) -> str: return "" def get_tags(self): - return ['SYCL', 'application'] + return ["SYCL", "application"] def run(self, env_vars) -> list[Result]: env_vars.update(self.extra_env_vars()) @@ -176,7 +176,7 @@ def parse_output(self, stdout: str) -> float: ) def get_tags(self): - return ['SYCL', 'application', 'throughput'] + return ["SYCL", "application", "throughput"] class Bitcracker(VelocityBase): @@ -217,7 +217,7 @@ def parse_output(self, stdout: str) -> float: ) def get_tags(self): - return ['SYCL', 'application', 'throughput'] + return ["SYCL", "application", "throughput"] class SobelFilter(VelocityBase): @@ -266,7 +266,7 @@ def parse_output(self, stdout: str) -> float: ) def get_tags(self): - return ['SYCL', 'application', 'image', 'throughput'] + return ["SYCL", "application", "image", "throughput"] class QuickSilver(VelocityBase): @@ -316,7 +316,7 @@ def parse_output(self, stdout: str) -> float: ) def get_tags(self): - return ['SYCL', 'application', 'simulation', 'throughput'] + return ["SYCL", "application", "simulation", "throughput"] class Easywave(VelocityBase): @@ -383,7 +383,7 @@ def parse_output(self, stdout: str) -> float: ) def get_tags(self): - return ['SYCL', 'application', 'simulation'] + return ["SYCL", "application", "simulation"] class CudaSift(VelocityBase): @@ -414,7 +414,7 @@ def parse_output(self, stdout: str) -> float: raise ValueError("Failed to parse benchmark output.") def get_tags(self): - return ['SYCL', 'application', 'image'] + return ["SYCL", "application", "image"] class DLCifar(VelocityBase): @@ -468,7 +468,7 @@ def parse_output(self, stdout: str) -> float: raise ValueError("Failed to parse benchmark output.") def get_tags(self): - return ['SYCL', 'application', 'inference', 'image'] + return ["SYCL", "application", "inference", "image"] class DLMnist(VelocityBase): @@ -556,7 +556,7 @@ def parse_output(self, stdout: str) -> float: raise ValueError("Failed to parse benchmark output.") def get_tags(self): - return ['SYCL', 'application', 'inference', 'image'] + return ["SYCL", "application", "inference", "image"] class SVM(VelocityBase): @@ -602,4 +602,4 @@ def parse_output(self, stdout: str) -> float: raise ValueError("Failed to parse benchmark output.") def get_tags(self): - return ['SYCL', 'application', 'inference'] + return ["SYCL", "application", "inference"] diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index 41fe6996ed432..ba8e77c6aff9e 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -36,13 +36,6 @@

Benchmark Results

Options
-
-

Suites

-
- -
-
-

Display Options

@@ -56,12 +49,16 @@

Display Options

- +
-

Tags

-
- +

Suites

+
+
+
+ +
+

Tags

diff --git a/devops/scripts/benchmarks/html/styles.css b/devops/scripts/benchmarks/html/styles.css index 9a3c5fe69b287..3e9c3bd22fc37 100644 --- a/devops/scripts/benchmarks/html/styles.css +++ b/devops/scripts/benchmarks/html/styles.css @@ -242,11 +242,18 @@ details[open] summary::after { font-size: 18px; font-weight: 500; text-align: left; + display: flex; + align-items: center; } #suite-filters { display: flex; flex-wrap: wrap; - gap: 8px; + max-height: 200px; + overflow-y: auto; + border: 1px solid #dee2e6; + border-radius: 4px; + padding: 8px; + background-color: #f8f9fa; } .display-options { display: flex; @@ -286,7 +293,7 @@ details[open] summary::after { padding: 2px 8px; border-radius: 12px; font-size: 12px; - cursor: default; + cursor: help; } .tag-filter { @@ -320,44 +327,21 @@ details[open] summary::after { background-color: #f8f9fa; } -.tag-filter-actions { - margin-bottom: 8px; - display: flex; - gap: 8px; -} - -.tag-filter-actions button { - padding: 4px 8px; +.tag-action-button { + padding: 2px 8px; background: #e2e6ea; border: none; border-radius: 4px; cursor: pointer; + font-size: 12px; + margin-left: 8px; + vertical-align: middle; } -.tag-filter-actions button:hover { +.tag-action-button:hover { background: #ced4da; } -#active-tags { - display: none; - flex-wrap: wrap; - gap: 8px; - margin-top: 12px; - padding: 8px; - background-color: #f8f9fa; - border-radius: 4px; -} - -.active-tag { - display: flex; - align-items: center; - background-color: #0068B5; - color: white; - padding: 4px 8px; - border-radius: 12px; - font-size: 14px; -} - .remove-tag { background: none; border: none; diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 267c7f8142c2f..c852e50c71372 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -47,4 +47,5 @@ class Options: custom_results_dir = None build_jobs: int = multiprocessing.cpu_count() + options = Options() diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 429b24eb632c8..319e796a3831d 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -17,13 +17,13 @@ def generate_html( metadata: dict[str, BenchmarkMetadata], ): benchmark_runs.sort(key=lambda run: run.date, reverse=True) - + # Create the comprehensive output object output = BenchmarkOutput( runs=benchmark_runs, metadata=metadata, tags=benchmark_tags_dict, - default_compare_names=compare_names + default_compare_names=compare_names, ) if options.output_html == "local": @@ -37,7 +37,7 @@ def generate_html( f.write("benchmarkMetadata = ") json.dump(json.loads(output.to_json())["metadata"], f, indent=2) f.write(";\n\n") - + f.write("benchmarkTags = ") json.dump(json.loads(output.to_json())["tags"], f, indent=2) f.write(";\n\n") diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py index 85271726e715c..e617168f37a76 100644 --- a/devops/scripts/benchmarks/utils/compute_runtime.py +++ b/devops/scripts/benchmarks/utils/compute_runtime.py @@ -143,7 +143,10 @@ def build_igc(self, repo, commit): run(configure_command) # set timeout to 2h. IGC takes A LONG time to build if building from scratch. - run(f"cmake --build {self.igc_build} -j {options.build_jobs}", timeout=60 * 60 * 2) + run( + f"cmake --build {self.igc_build} -j {options.build_jobs}", + timeout=60 * 60 * 2, + ) # cmake --install doesn't work... run("make install", cwd=self.igc_build) return self.igc_install From a0d8370e5011ecb62dc31b7c82542d3e979429d8 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 15:00:10 +0100 Subject: [PATCH 069/114] fix fetching tags from remote json --- devops/scripts/benchmarks/html/scripts.js | 1 + 1 file changed, 1 insertion(+) diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index fbfb496533194..e09b420e95f21 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -789,6 +789,7 @@ function loadData() { .then(data => { benchmarkRuns = data.runs || data; benchmarkMetadata = data.metadata || benchmarkMetadata || {}; + benchmarkTags = data.tags || benchmarkTags || {}; initializeCharts(); }) .catch(error => { From c7f8d1084c95af7a8fa2406a666d7b29a9ad6553 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 16:06:00 +0100 Subject: [PATCH 070/114] fix results /w descriptions and add url/commit of benchmarks --- devops/scripts/benchmarks/benches/compute.py | 13 ++++++++++--- devops/scripts/benchmarks/benches/llamacpp.py | 13 ++++++++++--- devops/scripts/benchmarks/benches/syclbench.py | 12 ++++++++++-- devops/scripts/benchmarks/benches/test.py | 1 - devops/scripts/benchmarks/benches/velocity.py | 13 ++++++++++--- devops/scripts/benchmarks/utils/result.py | 3 ++- 6 files changed, 42 insertions(+), 13 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index cd4ab7cd9b26c..0646aa500450a 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -42,6 +42,12 @@ def __init__(self, directory): def name(self) -> str: return "Compute Benchmarks" + def git_url(self) -> str: + return "https://github.com/intel/compute-benchmarks.git" + + def git_hash(self) -> str: + return "b5cc46acf61766ab00da04e85bd4da4f7591eb21" + def setup(self): if options.sycl is None: return @@ -49,8 +55,8 @@ def setup(self): repo_path = git_clone( self.directory, "compute-benchmarks-repo", - "https://github.com/intel/compute-benchmarks.git", - "b5cc46acf61766ab00da04e85bd4da4f7591eb21", + self.git_url(), + self.git_hash(), ) build_path = create_build_path(self.directory, "compute-benchmarks-build") @@ -237,7 +243,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=parse_unit_type(unit), - description=self.description(), + git_url=self.git_url(), + git_hash=self.git_hash(), ) ) return ret diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index 19af2498a0a63..33ffd1f11eabd 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -25,6 +25,12 @@ def __init__(self, directory): def name(self) -> str: return "llama.cpp bench" + def git_url(self) -> str: + return "https://github.com/ggerganov/llama.cpp" + + def git_hash(self) -> str: + return "1ee9eea094fe5846c7d8d770aa7caa749d246b23" + def setup(self): if options.sycl is None: return @@ -32,8 +38,8 @@ def setup(self): repo_path = git_clone( self.directory, "llamacpp-repo", - "https://github.com/ggerganov/llama.cpp", - "1ee9eea094fe5846c7d8d770aa7caa749d246b23", + self.git_url(), + self.git_hash(), ) self.models_dir = os.path.join(self.directory, "models") @@ -142,7 +148,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", - description=self.description(), + git_url=self.git_url(), + git_hash=self.git_hash(), ) ) return results diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index f1e366aa5bc4b..0d924f7427ef0 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -23,6 +23,12 @@ def __init__(self, directory): def name(self) -> str: return "SYCL-Bench" + def git_url(self) -> str: + return "https://github.com/unisa-hpc/sycl-bench.git" + + def git_hash(self) -> str: + return "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b" + def setup(self): if options.sycl is None: return @@ -31,8 +37,8 @@ def setup(self): repo_path = git_clone( self.directory, "sycl-bench-repo", - "https://github.com/unisa-hpc/sycl-bench.git", - "31fc70be6266193c4ba60eb1fe3ce26edee4ca5b", + self.git_url(), + self.git_hash(), ) configure_command = [ @@ -159,6 +165,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=row, unit="ms", + git_url=self.git_url(), + git_hash=self.git_hash(), ) ) diff --git a/devops/scripts/benchmarks/benches/test.py b/devops/scripts/benchmarks/benches/test.py index 7afdd803b5cc3..ad1e8c9e57735 100644 --- a/devops/scripts/benchmarks/benches/test.py +++ b/devops/scripts/benchmarks/benches/test.py @@ -99,7 +99,6 @@ def run(self, env_vars) -> list[Result]: env={"A": "B"}, stdout="no output", unit="ms", - description=self.description(), ) ] diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 0e1f20999c731..4db6a87a97325 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -26,6 +26,12 @@ def __init__(self, directory): def name(self) -> str: return "Velocity Bench" + def git_url(self) -> str: + return "https://github.com/oneapi-src/Velocity-Bench/" + + def git_hash(self) -> str: + return "b22215c16f789100449c34bf4eaa3fb178983d69" + def setup(self): if options.sycl is None: return @@ -33,8 +39,8 @@ def setup(self): self.repo_path = git_clone( self.directory, "velocity-bench-repo", - "https://github.com/oneapi-src/Velocity-Bench/", - "b22215c16f789100449c34bf4eaa3fb178983d69", + self.git_url(), + self.git_hash(), ) def benchmarks(self) -> list[Benchmark]: @@ -139,7 +145,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, - description=self.description(), + git_url=self.git_url(), + git_hash=self.git_hash(), ) ] diff --git a/devops/scripts/benchmarks/utils/result.py b/devops/scripts/benchmarks/utils/result.py index 82fc7ca1fddc2..b9ebfdcb60952 100644 --- a/devops/scripts/benchmarks/utils/result.py +++ b/devops/scripts/benchmarks/utils/result.py @@ -27,7 +27,8 @@ class Result: name: str = "" lower_is_better: bool = True suite: str = "Unknown" - + git_url: str = "" + git_hash: str = "" @dataclass_json @dataclass From 1dad51339a1f6684aa82c2023c7718bbf74c0be0 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 20 Mar 2025 16:37:23 +0100 Subject: [PATCH 071/114] fix git repo/hash for benchmarks --- devops/scripts/benchmarks/benches/compute.py | 4 ++-- devops/scripts/benchmarks/benches/llamacpp.py | 4 ++-- devops/scripts/benchmarks/benches/syclbench.py | 4 ++-- devops/scripts/benchmarks/benches/velocity.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 0646aa500450a..d83a0d081af57 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -243,8 +243,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=parse_unit_type(unit), - git_url=self.git_url(), - git_hash=self.git_hash(), + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) return ret diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py index 33ffd1f11eabd..86d41ed525292 100644 --- a/devops/scripts/benchmarks/benches/llamacpp.py +++ b/devops/scripts/benchmarks/benches/llamacpp.py @@ -148,8 +148,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit="token/s", - git_url=self.git_url(), - git_hash=self.git_hash(), + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) return results diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py index 0d924f7427ef0..9854c92d338fc 100644 --- a/devops/scripts/benchmarks/benches/syclbench.py +++ b/devops/scripts/benchmarks/benches/syclbench.py @@ -165,8 +165,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=row, unit="ms", - git_url=self.git_url(), - git_hash=self.git_hash(), + git_url=self.bench.git_url(), + git_hash=self.bench.git_hash(), ) ) diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py index 4db6a87a97325..493298dea8b10 100644 --- a/devops/scripts/benchmarks/benches/velocity.py +++ b/devops/scripts/benchmarks/benches/velocity.py @@ -145,8 +145,8 @@ def run(self, env_vars) -> list[Result]: env=env_vars, stdout=result, unit=self.unit, - git_url=self.git_url(), - git_hash=self.git_hash(), + git_url=self.vb.git_url(), + git_hash=self.vb.git_hash(), ) ] From 9f1df9a63675eae64875355fb2fb774d653d40da Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 20 Mar 2025 12:33:30 -0700 Subject: [PATCH 072/114] [test] bump threshold to 0.01 to trigger failrues --- devops/scripts/benchmarks/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 14717eb6db595..c1ba090523eb6 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -45,7 +45,7 @@ class Options: preset: str = "Full" custom_results_dir = None - regression_threshold: float = 0.05 + regression_threshold: float = 0.01 timestamp_override: str = None From be7271cff23e13c3583a9ac365f4dde5f3eb3842 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 24 Mar 2025 12:50:02 -0700 Subject: [PATCH 073/114] Rename ambiguous 'benchmarks.yml' to a better name --- .../workflows/{benchmark.yml => sycl-ur-perf-benchmarking.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{benchmark.yml => sycl-ur-perf-benchmarking.yml} (100%) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml similarity index 100% rename from .github/workflows/benchmark.yml rename to .github/workflows/sycl-ur-perf-benchmarking.yml From c55313bcc3530cd3f41a84505885e102f3ce2fed Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 26 Mar 2025 12:32:19 -0700 Subject: [PATCH 074/114] Remove sycl-benchmark-aggregate instrumentation --- .../workflows/sycl-benchmark-aggregate.yml | 83 ++++++++++--------- 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/.github/workflows/sycl-benchmark-aggregate.yml b/.github/workflows/sycl-benchmark-aggregate.yml index b6f391f3e1e6d..87f7ef718160a 100644 --- a/.github/workflows/sycl-benchmark-aggregate.yml +++ b/.github/workflows/sycl-benchmark-aggregate.yml @@ -1,47 +1,52 @@ -name: Test benchmark.yml using sycl-benchmark-aggregate +name: Aggregate compute-benchmark averages from historical data + +# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on +# how the benchmark results compare to a historical average: This historical +# average is calculated in this workflow, which aggregates historical data and +# produces measures of central tendency (median in this case) used for this +# purpose. on: workflow_dispatch: inputs: - commit_hash: - description: Specific commit hash to build SYCL from - type: string - required: false - upload_results: - description: 'Save and upload results' - type: choice - options: - - false - - true - default: true - runner: - type: choice - options: - - '["PVC_PERF"]' - backend: - description: Backend to use - type: choice - options: - - 'level_zero:gpu' - # TODO L0 V2 support - reset_intel_gpu: - description: Reset Intel GPUs - type: choice - options: - - false - - true - default: true + lookback_days: + description: | + Number of days from today to look back in historical results for: + This sets the age limit of data used in average calculation: Any + benchmark results created before `lookback_days` from today is + excluded from being aggregated in the historical average. + type: number + required: true + workflow_call: + inputs: + lookback_days: + type: number + required: true + secrets: + LLVM_SYCL_BENCHMARK_TOKEN: + description: | + Github token used by the faceless account to push newly calculated + medians. + required: true + -permissions: read-all +permissions: + contents: read jobs: aggregate: - name: Test benchmark.yml - uses: ./.github/workflows/benchmark.yml - secrets: inherit - with: - commit_hash: ${{ inputs.commit_hash }} - upload_results: ${{ inputs.upload_results }} - runner: ${{ inputs.runner }} - backend: ${{ inputs.backend }} - reset_intel_gpu: ${{ inputs.reset_intel_gpu }} \ No newline at end of file + name: Aggregate average (median) value for all metrics + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: | + devops/scripts/benchmarking + devops/benchmarking + devops/actions/benchmarking + - name: Aggregate benchmark results and produce historical average + uses: ./devops/actions/benchmarking/aggregate + with: + lookback_days: ${{ inputs.lookback_days }} + env: + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} From d0d1d3d06728f809c4d39a8adf18bbd65795f8f9 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Mar 2025 12:21:06 -0700 Subject: [PATCH 075/114] Enable build from PR and L0v2 --- .github/workflows/sycl-linux-run-tests.yml | 4 +- .../workflows/sycl-ur-perf-benchmarking.yml | 58 ++++++++++++++----- .../actions/run-tests/benchmark_v2/action.yml | 19 +++--- 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 3a93c2aae254c..27645fb559ca6 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -118,7 +118,7 @@ on: type: string default: 'false' required: False - benchmark_build_hash: + benchmark_save_name: type: string default: '' required: False @@ -355,7 +355,7 @@ jobs: with: target_devices: ${{ inputs.target_devices }} upload_results: ${{ inputs.benchmark_upload_results }} - build_hash: ${{ inputs.benchmark_build_hash }} + save_name: ${{ inputs.benchmark_save_name }} env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index 8e860bce6a384..cf5d9f3fde006 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -5,8 +5,23 @@ on: - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml workflow_call: inputs: + pr_no: + type: string + description: | + PR no. to build SYCL from if specified: SYCL will be built from HEAD + of incoming branch used by the specified PR no. + + If both pr_no and commit_hash are empty, the latest SYCL nightly build + will be used. + required: false + default: '' commit_hash: type: string + description: | + Commit hash (within intel/llvm) to build SYCL from if specified. + + If both pr_no and commit_hash are empty, the latest SYCL nightly build + will be used. required: false default: '' upload_results: @@ -25,9 +40,20 @@ on: workflow_dispatch: inputs: + pr_no: + type: string + description: | + PR no. to build SYCL from: + + SYCL will be built from HEAD of incoming branch. + required: false + default: '' commit_hash: - description: Commit hash to build intel/llvm from type: string + description: | + Commit hash (within intel/llvm) to build SYCL from: + + Leave both pr_no and commit_hash empty to use last SYCL nightly build. required: false default: '' upload_results: @@ -46,7 +72,8 @@ on: type: choice options: - 'level_zero:gpu' - # TODO L0 V2 support + - 'level_zero_v2:gpu' + # As of #17407, sycl-linux-build now builds v2 by default reset_intel_gpu: description: Reset Intel GPUs type: choice @@ -59,37 +86,37 @@ permissions: read-all jobs: build_sycl: - name: Build SYCL from PR - if: inputs.commit_hash != '' + if: inputs.commit_hash != '' || inputs.pr_no != '' + name: Build SYCL uses: ./.github/workflows/sycl-linux-build.yml with: - build_ref: ${{ inputs.commit_hash }} + build_ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }} build_cache_root: "/__w/" build_artifact_suffix: "default" build_cache_suffix: "default" - # Docker image has last nightly pre-installed and added to the PATH build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest" cc: clang cxx: clang++ changes: '[]' run_benchmarks_build: + if: inputs.commit_hash != '' || inputs.pr_no != '' name: Run Benchmarks (on PR Build) needs: [ build_sycl ] - if: inputs.commit_hash != '' strategy: matrix: - # Set default values if not specified: include: - - runner: ${{ inputs.runner || '["PVC_PERF"]' }} + - ref: ${{ inputs.commit_hash != '' && inputs.commit_hash || format('refs/pull/{0}/head', inputs.pr_no) }} + save_name: ${{ inputs.commit_hash != '' && format('Commit{0}', inputs.commit_hash) || format('PR{0}', inputs.pr_no) }} + # Set default values if not specified: + runner: ${{ inputs.runner || '["PVC_PERF"]' }} backend: ${{ inputs.backend || 'level_zero:gpu' }} reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} - ref: ${{ inputs.commit_hash }} uses: ./.github/workflows/sycl-linux-run-tests.yml secrets: inherit with: # TODO support other benchmarks - name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }}) runner: ${{ matrix.runner }} image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN @@ -97,7 +124,7 @@ jobs: reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 benchmark_upload_results: ${{ inputs.upload_results }} - benchmark_build_hash: ${{ inputs.commit_hash }} + benchmark_save_name: ${{ matrix.save_name }} repo_ref: ${{ matrix.ref }} devops_ref: ${{ github.ref }} sycl_toolchain_artifact: sycl_linux_default @@ -106,7 +133,7 @@ jobs: run_benchmarks_nightly: name: Run Benchmarks (on Nightly Build) - if: inputs.commit_hash == '' + if: inputs.commit_hash == '' && inputs.pr_no == '' strategy: matrix: # Set default values if not specified: @@ -118,12 +145,13 @@ jobs: secrets: inherit with: # TODO support other benchmarks - name: Run compute-benchmarks (${{ matrix.runner }}, ${{ matrix.backend }}) + name: Run compute-benchmarks (Nightly, ${{ matrix.runner }}, ${{ matrix.backend }}) runner: ${{ matrix.runner }} image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 + benchmark_save_name: Baseline benchmark_upload_results: ${{ inputs.upload_results }} - repo_ref: ${{ github.ref }} + repo_ref: ${{ github.ref }} # TODO figure out nightly commit hash diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index fa03ec9a060c5..d540ef58d29e1 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -19,9 +19,9 @@ inputs: upload_results: type: string required: True - build_hash: + save_name: type: string - required: False + required: True default: '' runs: @@ -86,7 +86,9 @@ runs: git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks env: - BUILD_HASH: ${{ inputs.build_hash }} + # Need to append "__" to save name in order to follow + # conventions: + SAVE_PREFIX: ${{ inputs.save_name }} shell: bash run: | # TODO generate summary + display helpful message here @@ -98,12 +100,15 @@ runs: echo "-----" mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" + case "$ONEAPI_DEVICE_SELECTOR" in + level_zero:*) SAVE_SUFFIX="L0" ;; + level_zero_v2:*) SAVE_SUFFIX="L0v2" ;; + opencl:*) SAVE_SUFFIX="OCL" ;; + *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";; + esac # TODO accomodate for different GPUs and backends - SAVE_NAME="Baseline_PVC_L0" + SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}" SAVE_TIMESTAMP="$(date +'%Y%m%d_%H%M%S')" - if [ -n "$BUILD_HASH" ]; then - SAVE_NAME="Commit_PVC_$BUILD_HASH" - fi taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ "$(realpath ./llvm_test_workdir)" \ From 4c515586d22fde76eeb8b341e3ad2e15eb3a6e83 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Mar 2025 14:07:15 -0700 Subject: [PATCH 076/114] Introduce presets --- .github/workflows/sycl-linux-run-tests.yml | 5 +++ .../workflows/sycl-ur-perf-benchmarking.yml | 19 ++++++++++ .../actions/run-tests/benchmark_v2/action.yml | 11 +++++- devops/scripts/benchmarks/presets.py | 37 +++++++++++++++++++ 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 27645fb559ca6..944eb97ffe381 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -122,6 +122,10 @@ on: type: string default: '' required: False + benchmark_preset: + type: string + default: 'Minimal' + required: False workflow_dispatch: inputs: @@ -356,6 +360,7 @@ jobs: target_devices: ${{ inputs.target_devices }} upload_results: ${{ inputs.benchmark_upload_results }} save_name: ${{ inputs.benchmark_save_name }} + preset: ${{ inputs.benchmark_preset }} env: RUNNER_TAG: ${{ inputs.runner }} GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index cf5d9f3fde006..e2cdcb58396c2 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -5,6 +5,12 @@ on: - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml workflow_call: inputs: + preset: + type: string + description: | + Benchmark presets to run: See /devops/scripts/benchmarks/presets.py + required: false + default: 'Minimal' # Only compute-benchmarks pr_no: type: string description: | @@ -40,6 +46,17 @@ on: workflow_dispatch: inputs: + preset: + type: choice + description: | + Benchmark presets to run, See /devops/scripts/benchmarks/presets.py. Hint: Minimal is compute-benchmarks only. + options: + - Full + - SYCL + - Minimal + - Normal + - Test + default: 'Minimal' # Only compute-benchmarks pr_no: type: string description: | @@ -125,6 +142,7 @@ jobs: tests_selector: benchmark_v2 benchmark_upload_results: ${{ inputs.upload_results }} benchmark_save_name: ${{ matrix.save_name }} + benchmark_preset: ${{ inputs.preset }} repo_ref: ${{ matrix.ref }} devops_ref: ${{ github.ref }} sycl_toolchain_artifact: sycl_linux_default @@ -154,4 +172,5 @@ jobs: tests_selector: benchmark_v2 benchmark_save_name: Baseline benchmark_upload_results: ${{ inputs.upload_results }} + benchmark_preset: ${{ inputs.preset }} repo_ref: ${{ github.ref }} # TODO figure out nightly commit hash diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index d540ef58d29e1..0339c4337a759 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -22,7 +22,9 @@ inputs: save_name: type: string required: True - default: '' + preset: + type: string + required: True runs: using: "composite" @@ -32,6 +34,7 @@ runs: env: TARGET_DEVICE: ${{ inputs.target_devices }} RUNNER_NAME: ${{ runner.name }} + PRESET: ${{ inputs.preset }} run: | case "$RUNNER_TAG" in '["PVC_PERF"]' ) ;; @@ -61,6 +64,10 @@ runs: esac echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + # Make sure specified preset is a known value and is not malicious + python3 ./devops/scripts/benchmarks/preset.py "$PRESET" + [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset + echo "PRESET=$PRESET" >> $GITHUB_ENV - name: Compute CPU core range to run benchmarks on shell: bash run: | @@ -117,7 +124,7 @@ runs: --output-html remote \ --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ - --preset Minimal \ + --preset "$PRESET" \ --timestamp-override "$SAVE_TIMESTAMP" echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 3f191766deb8c..42a49b732ff3c 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -3,6 +3,8 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import argparse + presets: dict[str, list[str]] = { "Full": [ "Compute Benchmarks", @@ -36,3 +38,38 @@ def enabled_suites(preset: str) -> list[str]: return presets[preset] except KeyError: raise ValueError(f"Preset '{preset}' not found.") + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark Preset Utilities") + subparsers = parser.add_subparsers(dest="command", required=True) + + query_parser = subparsers.add_parser( + "query", + help="Query benchmarks ran by a preset (as defined in presets.py)" + ) + validate_parser.add_argument( + "preset_to_query", + type=str, + help="preset name to query" + ) + validate_parser.add_argument( + "-q", "--quiet", + action="store_true", + help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script." + ) + + args = parser.parse_args() + if args.command == 'query': + if args.preset_to_query in presets: + if not args.quiet: + print(f"Benchmark suites to be ran in {args.preset_to_query}:") + for suite in presets[args.preset_to_query]: + print(suite) + exit(0) + else: + if not args.quiet: print(f"Error: No preset named '{args.preset_to_query}'.") + exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file From 63d22353e0c883489713b2fc5db6e0015077b066 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Mar 2025 14:13:08 -0700 Subject: [PATCH 077/114] Fix typo --- devops/actions/run-tests/benchmark_v2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 0339c4337a759..4b9c0c2f61fd2 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -65,7 +65,7 @@ runs: echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV # Make sure specified preset is a known value and is not malicious - python3 ./devops/scripts/benchmarks/preset.py "$PRESET" + python3 ./devops/scripts/benchmarks/presets.py "$PRESET" [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset echo "PRESET=$PRESET" >> $GITHUB_ENV - name: Compute CPU core range to run benchmarks on From 23330fc2bc1463ae456a70648101e5647b2ae7bf Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Mar 2025 14:14:15 -0700 Subject: [PATCH 078/114] Fix typo part 2. --- devops/scripts/benchmarks/presets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 42a49b732ff3c..167283baeea7c 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -48,12 +48,12 @@ def main(): "query", help="Query benchmarks ran by a preset (as defined in presets.py)" ) - validate_parser.add_argument( + query_parser.add_argument( "preset_to_query", type=str, help="preset name to query" ) - validate_parser.add_argument( + query_parser.add_argument( "-q", "--quiet", action="store_true", help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script." From 0d79d8993c69e2017e7231da2ef44447bea197aa Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 28 Mar 2025 14:15:13 -0700 Subject: [PATCH 079/114] Fix typo pt 3. --- devops/actions/run-tests/benchmark_v2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 4b9c0c2f61fd2..59d3c0fa96f6f 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -65,7 +65,7 @@ runs: echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV # Make sure specified preset is a known value and is not malicious - python3 ./devops/scripts/benchmarks/presets.py "$PRESET" + python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset echo "PRESET=$PRESET" >> $GITHUB_ENV - name: Compute CPU core range to run benchmarks on From a8048b2597930d96279a1283a791e3d613657827 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Sun, 30 Mar 2025 20:06:34 -0700 Subject: [PATCH 080/114] Reset ur-build-hw.sh --- .github/workflows/ur-build-hw.yml | 2 +- {devops => unified-runtime/.github}/scripts/get_system_info.sh | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {devops => unified-runtime/.github}/scripts/get_system_info.sh (100%) diff --git a/.github/workflows/ur-build-hw.yml b/.github/workflows/ur-build-hw.yml index eebac4e424a4b..a0f94ab10f538 100644 --- a/.github/workflows/ur-build-hw.yml +++ b/.github/workflows/ur-build-hw.yml @@ -156,4 +156,4 @@ jobs: - name: Get information about platform if: ${{ always() }} - run: ${{github.workspace}}/devops/scripts/get_system_info.sh + run: ${{github.workspace}}/unified-runtime/.github/scripts/get_system_info.sh diff --git a/devops/scripts/get_system_info.sh b/unified-runtime/.github/scripts/get_system_info.sh similarity index 100% rename from devops/scripts/get_system_info.sh rename to unified-runtime/.github/scripts/get_system_info.sh From 29d125c5d2f5e053536ee7ef4df568f9bb7ee170 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Sun, 30 Mar 2025 20:09:54 -0700 Subject: [PATCH 081/114] Add comments explaining executable section in presets.py --- devops/scripts/benchmarks/presets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 167283baeea7c..61ce9f4aebc49 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -40,6 +40,8 @@ def enabled_suites(preset: str) -> list[str]: raise ValueError(f"Preset '{preset}' not found.") +# Utility scripts to validate a given preset, useful for e.g. CI: + def main(): parser = argparse.ArgumentParser(description="Benchmark Preset Utilities") subparsers = parser.add_subparsers(dest="command", required=True) @@ -72,4 +74,4 @@ def main(): exit(1) if __name__ == "__main__": - main() \ No newline at end of file + main() From 5a3afcbd19b14a615e514156daa034ac4e414eef Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Sun, 30 Mar 2025 20:16:42 -0700 Subject: [PATCH 082/114] Revert stuff that shouldnt be merged --- .github/workflows/sycl-linux-run-tests.yml | 1 - devops/scripts/benchmarks/requirements.txt | 1 - devops/scripts/benchmarks/utils/validate.py | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 59da98228bae9..257b1d4723916 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -136,7 +136,6 @@ on: - '["cts-cpu"]' - '["Linux", "build"]' - '["cuda"]' - - '["Linux", "bmg"]' - '["PVC_PERF"]' image: type: choice diff --git a/devops/scripts/benchmarks/requirements.txt b/devops/scripts/benchmarks/requirements.txt index 9f0381ceef6c2..99ba0caab55c2 100644 --- a/devops/scripts/benchmarks/requirements.txt +++ b/devops/scripts/benchmarks/requirements.txt @@ -2,4 +2,3 @@ matplotlib==3.9.2 mpld3==0.5.10 dataclasses-json==0.6.7 PyYAML==6.0.1 -Mako==1.3.9 diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py index 2d01255487a44..3d22c242f8301 100644 --- a/devops/scripts/benchmarks/utils/validate.py +++ b/devops/scripts/benchmarks/utils/validate.py @@ -19,4 +19,4 @@ def timestamp(t: str) -> bool: timestamp_re = re.compile( r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" ) - return timestamp_re.match(t) is not None \ No newline at end of file + return timestamp_re.match(t) is not None From b6d42d41298d92c9f7f6003adfd243cfa574d18f Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Sun, 30 Mar 2025 20:22:16 -0700 Subject: [PATCH 083/114] Finally no more reset_intel_gpu --- .github/workflows/sycl-ur-perf-benchmarking.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index e2cdcb58396c2..2713d60f0a2b9 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -39,10 +39,6 @@ on: backend: type: string required: true - reset_intel_gpu: - type: string # true/false: workflow_dispatch does not support booleans - required: true - default: true workflow_dispatch: inputs: @@ -91,13 +87,6 @@ on: - 'level_zero:gpu' - 'level_zero_v2:gpu' # As of #17407, sycl-linux-build now builds v2 by default - reset_intel_gpu: - description: Reset Intel GPUs - type: choice - options: - - false - - true - default: true permissions: read-all @@ -128,17 +117,14 @@ jobs: # Set default values if not specified: runner: ${{ inputs.runner || '["PVC_PERF"]' }} backend: ${{ inputs.backend || 'level_zero:gpu' }} - reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} uses: ./.github/workflows/sycl-linux-run-tests.yml secrets: inherit with: - # TODO support other benchmarks name: Run compute-benchmarks (${{ matrix.save_name }}, ${{ matrix.runner }}, ${{ matrix.backend }}) runner: ${{ matrix.runner }} image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} - reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 benchmark_upload_results: ${{ inputs.upload_results }} benchmark_save_name: ${{ matrix.save_name }} @@ -158,7 +144,6 @@ jobs: include: - runner: ${{ inputs.runner || '["PVC_PERF"]' }} backend: ${{ inputs.backend || 'level_zero:gpu' }} - reset_intel_gpu: ${{ inputs.reset_intel_gpu || 'true' }} uses: ./.github/workflows/sycl-linux-run-tests.yml secrets: inherit with: @@ -168,7 +153,6 @@ jobs: image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} - reset_intel_gpu: ${{ matrix.reset_intel_gpu }} tests_selector: benchmark_v2 benchmark_save_name: Baseline benchmark_upload_results: ${{ inputs.upload_results }} From 8b3b79cae30920595d1e410d72034c8b57dd17b9 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 31 Mar 2025 10:26:41 -0700 Subject: [PATCH 084/114] Remove streaming median --- devops/scripts/benchmarks/utils/aggregate.py | 52 -------------------- 1 file changed, 52 deletions(-) diff --git a/devops/scripts/benchmarks/utils/aggregate.py b/devops/scripts/benchmarks/utils/aggregate.py index a6db4d36334c4..36ee7cbecaae6 100644 --- a/devops/scripts/benchmarks/utils/aggregate.py +++ b/devops/scripts/benchmarks/utils/aggregate.py @@ -1,4 +1,3 @@ -import heapq import statistics from abc import ABC, abstractmethod @@ -52,54 +51,3 @@ def add(self, n: float): def get_avg(self) -> float: return statistics.median(self.elements) - - -class StreamingMedian(Aggregator): - """ - Calculate medians incrementally using heaps: Theoretically the fastest way - to calculate a median from a stream of elements, but realistically is only - faster when dealing with huge numbers of samples that would be generated by - i.e. enabling this workflow in precommit and using longer periods of time. - """ - - def __init__(self, starting_elements: list = []): - # Gist: we keep a minheap and a maxheap, and store the median as the top - # of the minheap. When a new element comes it gets put into the heap - # based on if the element is bigger than the current median. Then, the - # heaps are heapified and the median is repopulated by heapify. - self.minheap_larger = [] - self.maxheap_smaller = [] - - map(lambda n: self.add(n), starting_elements) - - @staticmethod - def get_type() -> str: - return "median" - - # Note: numbers on maxheap should be negative, as heapq - # is minheap by default - - def add(self, n: float): - if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n: - heapq.heappush(self.maxheap_smaller, -n) - else: - heapq.heappush(self.minheap_larger, n) - - # Ensure minheap has more elements than maxheap - if len(self.maxheap_smaller) > len(self.minheap_larger) + 1: - heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller)) - elif len(self.maxheap_smaller) < len(self.minheap_larger): - heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger)) - - def get_avg(self) -> float: - if len(self.maxheap_smaller) == len(self.minheap_larger): - # Equal number of elements smaller and larger than "median": - # thus, there are two median values. The median would then become - # the average of both median values. - return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0 - else: - # Otherwise, median is always in minheap, as minheap is always - # bigger - return -self.maxheap_smaller[0] - - From 3a070d588f062e68edf9bb1c0d022b67cc268554 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 31 Mar 2025 10:52:34 -0700 Subject: [PATCH 085/114] Add missing newlines --- .github/workflows/sycl-linux-run-tests.yml | 2 +- devops/scripts/benchmarks/compare.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 257b1d4723916..f73c731eec506 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -335,4 +335,4 @@ jobs: preset: ${{ inputs.benchmark_preset }} env: RUNNER_TAG: ${{ inputs.runner }} - GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index d538577b0ce35..082648a1005ad 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -295,4 +295,4 @@ def print_regression(entry: dict): exit(1) # Exit 1 to trigger github test failure else: print("Unsupported operation: exiting.") - exit(1) \ No newline at end of file + exit(1) From 186b36e1d14bd28dcb4b34cc408e7399173091a3 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 31 Mar 2025 11:54:53 -0700 Subject: [PATCH 086/114] Allegedly, runner name is already baked into github_env --- devops/actions/run-tests/benchmark_v2/action.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 59d3c0fa96f6f..ba5d74e45f88a 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -33,7 +33,6 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} - RUNNER_NAME: ${{ runner.name }} PRESET: ${{ inputs.preset }} run: | case "$RUNNER_TAG" in @@ -51,7 +50,6 @@ runs: echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." exit 1 fi - echo "RUNNER_NAME=$RUNNER_NAME" >> $GITHUB_ENV # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in From de280a532de18a8b6804046673d622e1a6e07f43 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 31 Mar 2025 20:33:19 -0700 Subject: [PATCH 087/114] Modify save directory structure, amend hostname behavior for github runners --- .../actions/run-tests/benchmark_v2/action.yml | 5 +- devops/scripts/benchmarks/compare.py | 83 ++++++++++++++----- devops/scripts/benchmarks/history.py | 19 ++++- devops/scripts/benchmarks/utils/validate.py | 10 +-- 4 files changed, 88 insertions(+), 29 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index ba5d74e45f88a..7507ea09492db 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -103,7 +103,6 @@ runs: echo "-----" pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt echo "-----" - mkdir -p "./llvm-ci-perf-results/$RUNNER_NAME" case "$ONEAPI_DEVICE_SELECTOR" in level_zero:*) SAVE_SUFFIX="L0" ;; @@ -120,8 +119,8 @@ runs: --sycl "$(realpath ./toolchain)" \ --save "$SAVE_NAME" \ --output-html remote \ - --results-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ - --output-dir "./llvm-ci-perf-results/$RUNNER_NAME" \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ --preset "$PRESET" \ --timestamp-override "$SAVE_TIMESTAMP" echo "-----" diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index 082648a1005ad..e4de190b76d03 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -39,8 +39,8 @@ class Compare: """Class containing logic for comparisons between results""" @staticmethod def get_hist_avg( - result_name: str, result_dir: str, cutoff: str, aggregator=SimpleMedian, - exclude: list[str] = [] + result_name: str, result_dir: str, hostname: str, cutoff: str, + aggregator: Aggregator = SimpleMedian, exclude: list[str] = [] ) -> dict[str, BenchmarkHistoricAverage]: """ Create a historic average for results named result_name in result_dir @@ -51,6 +51,7 @@ def get_hist_avg( result_dir (str): Path to folder containing benchmark results cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in average calcultaion + hostname (str): Hostname of machine on which results ran on aggregator (Aggregator): The aggregator to use for calculating the historic average exclude (list[str]): List of filenames (only the stem) to exclude @@ -60,6 +61,9 @@ def get_hist_avg( A dictionary mapping benchmark names to BenchmarkHistoricAverage objects """ + if not Validate.timestamp(cutoff): + raise ValueError("Provided cutoff time is not a proper timestamp.") + def get_timestamp(f: str) -> str: """Extract timestamp from result filename""" return str(f)[-len("YYYYMMDD_HHMMSS.json") : -len(".json")] @@ -67,7 +71,11 @@ def get_timestamp(f: str) -> str: def get_result_paths() -> list[str]: """ Get a list of all results matching result_name in result_dir that is - newer than the timestamp specified by cutoff + newer than the timestamp specified by cutoff based off of filename. + + This function assumes filenames of benchmark result files are + accurate; files returned by this function will be checked a second + time once their contents are actually loaded. """ cache_dir = Path(f"{result_dir}") @@ -84,6 +92,23 @@ def get_result_paths() -> list[str]: cache_dir.glob(f"{result_name}_*_*.json") ) ) + + def check_benchmark_result(result: BenchmarkRun) -> bool: + """ + Returns True if result file: + - Was ran on the target machine/hostname specified + - Sanity check: ensure metadata are all expected values: + - Date is truly before cutoff timestamp + - Name truly matches up with specified result_name + """ + if result.hostname != hostname: + return False + if result.name != result_name: + print(f"Warning: Result file {result_path} does not match specified result name {result.name}.") + return False + if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S"): + return False + return True # key: name of the benchmark test result # value: { command_args: set[str], aggregate: Aggregator } @@ -95,9 +120,13 @@ def get_result_paths() -> list[str]: for result_path in get_result_paths(): with result_path.open('r') as result_f: result = BenchmarkRun.from_json(json.load(result_f)) - - if result.name != result_name: - print(f"Warning: Result file {result_path} has mismatching name {result.name}. Skipping file.") + + # Perform another check on result file here, as get_result_paths() + # only filters out result files via filename, which: + # - does not contain enough information to filter out results, i.e. + # no hostname information. + # - information in filename may be mismatched from metadata. + if not check_benchmark_result(result): continue for test_run in result.results: @@ -139,26 +168,25 @@ def reset_aggregate() -> dict: def to_hist_avg( - hist_avg: dict[str, BenchmarkHistoricAverage], compare_file: str + hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun ) -> tuple: """ - Compare results in compare_file to a pre-existing map of historic - averages + Compare results in target to a pre-existing map of historic average. + + Caution: Ensure the generated hist_avg is for results running on the + same host as target.hostname. Args: hist_avg (dict): A historic average map generated from get_hist_avg - compare_file (str): Full filepath of result to compare against + target (BenchmarkRun): results to compare against hist_avg Returns: A tuple returning (list of improved tests, list of regressed tests). """ - with open(compare_file, 'r') as compare_f: - compare_result = BenchmarkRun.from_json(json.load(compare_f)) - improvement = [] regression = [] - for test in compare_result.results: + for test in target.results: if test.name not in hist_avg: continue if hist_avg[test.name].command_args != set(test.command[1:]): @@ -186,10 +214,9 @@ def perf_diff_entry() -> dict: return improvement, regression - def to_hist( - avg_type: str, result_name: str, compare_file: str, result_dir: str, cutoff: str, - + avg_type: str, result_name: str, compare_file: str, result_dir: str, + cutoff: str, ) -> tuple: """ Pregenerate a historic average from results named result_name in @@ -213,17 +240,33 @@ def to_hist( """ if avg_type != "median": - print("Only median is currently supported: refusing to continue.") + print("Only median is currently supported: Refusing to continue.") + exit(1) + + try: + with open(compare_file, 'r') as compare_f: + compare_result = BenchmarkRun.from_json(json.load(compare_f)) + except: + print(f"Unable to open {compare_file}.") + exit(1) + + # Sanity checks: + if compare_result.hostname == "Unknown": + print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.") + exit(1) + if not Validate.timestamp(cutoff): + print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.") exit(1) - # TODO call validator on cutoff timestamp + # Build historic average and compare results against historic average: hist_avg = Compare.get_hist_avg( result_name, result_dir, + compare_result.hostname, cutoff, exclude=[Path(compare_file).stem] ) - return Compare.to_hist_avg(hist_avg, compare_file) + return Compare.to_hist_avg(hist_avg, compare_result) if __name__ == "__main__": diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 191189fa1c4a9..cd3681b3d21cb 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -80,6 +80,23 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: except: git_hash = "unknown" github_repo = None + + # Check if RUNNER_NAME environment variable has been declared. + # + # RUNNER_NAME is always present in github runner environments. Because + # github runners obfusicate hostnames, using socket.gethostname() + # produces different hostnames when ran on the same machine multiple + # times. Thus, we rely on the RUNNER_NAME variable when running on + # github runners. + hostname = os.getenv("RUNNER_NAME") + if hostname is None: + hostname = socket.gethostname() + else if not Validate.runner_name(hostname): + # However, nothing stops github runner env variables (including + # RUNNER_NAME) from being modified by external actors. Ensure + # RUNNER_NAME contains nothing malicious: + # TODO is this overkill? + raise ValueError("Illegal characters found in specified RUNNER_NAME.") return BenchmarkRun( name=name, @@ -87,7 +104,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: github_repo=github_repo, date=datetime.now(tz=timezone.utc), results=results, - hostname=socket.gethostname(), + hostname=hostname, ) def save(self, save_name, results: list[Result], to_file=True): diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py index 3d22c242f8301..893bba54ef630 100644 --- a/devops/scripts/benchmarks/utils/validate.py +++ b/devops/scripts/benchmarks/utils/validate.py @@ -4,12 +4,12 @@ class Validate: """Static class containing methods for validating various fields""" @staticmethod - def filepath(path: str) -> bool: + def runner_name(runner_name: str) -> bool: """ - Returns True if path is clean (no illegal characters), otherwise False. + Returns True if runner_name is clean (no illegal characters). """ - filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+") - return filepath_re.match(path) is not None + runner_name_re = re.compile(r"[a-zA-Z0-9_]+") + return runner_name_re.match(runner_name) is not None @staticmethod def timestamp(t: str) -> bool: @@ -19,4 +19,4 @@ def timestamp(t: str) -> bool: timestamp_re = re.compile( r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" ) - return timestamp_re.match(t) is not None + return timestamp_re.match(t) is not None \ No newline at end of file From 4f5ce719a9472c94522c5d4b0a76c5c379df3ab4 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 31 Mar 2025 20:36:10 -0700 Subject: [PATCH 088/114] typo fix --- devops/scripts/benchmarks/history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index cd3681b3d21cb..5ec99c18aed8a 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -91,7 +91,7 @@ def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: hostname = os.getenv("RUNNER_NAME") if hostname is None: hostname = socket.gethostname() - else if not Validate.runner_name(hostname): + elif not Validate.runner_name(hostname): # However, nothing stops github runner env variables (including # RUNNER_NAME) from being modified by external actors. Ensure # RUNNER_NAME contains nothing malicious: From 9bd519fee870535bc320ba58ab899685133955e7 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Tue, 1 Apr 2025 13:16:08 -0700 Subject: [PATCH 089/114] Ensure timezones are UTC --- devops/scripts/benchmarks/compare.py | 19 +++++++++++++++---- devops/scripts/benchmarks/history.py | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index e4de190b76d03..59ba2b9b69c96 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -1,4 +1,4 @@ -from utils.aggregate import SimpleMedian +from utils.aggregate import Aggregator, SimpleMedian from utils.validate import Validate from utils.result import Result, BenchmarkRun from options import options @@ -7,6 +7,7 @@ import sys import json import argparse +from datetime import datetime, timezone from pathlib import Path from dataclasses import dataclass, asdict @@ -106,7 +107,7 @@ def check_benchmark_result(result: BenchmarkRun) -> bool: if result.name != result_name: print(f"Warning: Result file {result_path} does not match specified result name {result.name}.") return False - if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S"): + if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc): return False return True @@ -183,6 +184,14 @@ def to_hist_avg( Returns: A tuple returning (list of improved tests, list of regressed tests). """ + def halfway_round(value: int, n: int): + """ + Python's default round() does banker's rounding, which doesn't + make much sense here. This rounds 0.5 to 1, and -0.5 to -1 + """ + if value == 0: return 0 + return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n + improvement = [] regression = [] @@ -206,9 +215,11 @@ def perf_diff_entry() -> dict: res["avg_type"] = hist_avg[test.name].average_type return res - if delta > options.regression_threshold: + # Round to 2 decimal places: not going to fail a test on 0.001% over + # regression threshold + if halfway_round(delta, 2) > options.regression_threshold: improvement.append(perf_diff_entry()) - elif delta < -options.regression_threshold: + elif halfway_round(delta, 2) < -options.regression_threshold: regression.append(perf_diff_entry()) return improvement, regression diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 5ec99c18aed8a..46d1d0e1d1212 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -120,7 +120,7 @@ def save(self, save_name, results: list[Result], to_file=True): # Use formatted timestamp for the filename timestamp = ( - datetime.now().strftime("%Y%m%d_%H%M%S") + datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") if options.timestamp_override is None else options.timestamp_override ) From 3726a7dd5ff98920b2799b421eed4348b6493cb7 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Tue, 1 Apr 2025 13:16:20 -0700 Subject: [PATCH 090/114] Clarify options --- devops/scripts/benchmarks/main.py | 10 +++++----- devops/scripts/benchmarks/options.py | 13 ++++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 0484fb7c9654c..3ba6190843688 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -259,8 +259,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): chart_data = {this_name: results} results_dir = directory - if options.custom_results_dir: - results_dir = Path(options.custom_results_dir) + if options.results_directory_override: + results_dir = Path(options.results_directory_override) history = BenchmarkHistory(results_dir) # limit how many files we load. # should this be configurable? @@ -480,7 +480,7 @@ def validate_and_parse_env_args(env_args): "--results-dir", type=str, help="Specify a custom directory to load/store (historical) results from", - default=options.custom_results_dir, + default=options.results_directory_override, ) parser.add_argument( "--build-jobs", @@ -526,7 +526,7 @@ def validate_and_parse_env_args(env_args): options.cudnn_directory = args.cudnn_directory options.cublas_directory = args.cublas_directory options.preset = args.preset - options.custom_results_dir = args.results_dir + options.results_directory_override = args.results_dir options.build_jobs = args.build_jobs options.hip_arch = args.hip_arch @@ -546,7 +546,7 @@ def validate_and_parse_env_args(env_args): if args.results_dir is not None: if not os.path.isdir(args.results_dir): parser.error("Specified --results-dir is not a valid path") - options.custom_results_dir = os.path.abspath(args.results_dir) + options.results_directory_override = os.path.abspath(args.results_dir) benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 74a4487807d07..e513767e05747 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -44,11 +44,22 @@ class Options: build_igc: bool = False current_run_name: str = "This PR" preset: str = "Full" - custom_results_dir = None build_jobs: int = multiprocessing.cpu_count() + # Options applicable to CI only: regression_threshold: float = 0.05 + # In CI, it may be necessary to e.g. compare or redo benchmark runs. + # A timestamp is generated at the beginning of the CI run and used through + # the entire CI process, instead of scripts generating their own timestamps + # every time a script runs (default behavior). timestamp_override: str = None + # By default, the directory to fetch results from is the benchmark working + # directory specified in the CLI args, hence a default value of "None" as + # the value is decided via runtime. + # + # However, sometimes you may want to fetch results from a different + # directory, i.e. in CI when you clone the results directory elsewhere. + results_directory_override: str = None options = Options() From 60d80a99cd2c58573e35654f95ba8e952d093d74 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 10:57:55 -0700 Subject: [PATCH 091/114] enforce UTC time in benchmark action --- devops/actions/run-tests/benchmark_v2/action.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 7507ea09492db..c68dda5e0d3d0 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -101,6 +101,12 @@ runs: echo "-----" sycl-ls echo "-----" + # Using --break-system-packages because: + # - venv is not installed + # - unable to install anything via pip, as python packages in the docker + # container are managed by apt + # - apt is unable to install anything due to unresolved dpkg dependencies, + # as a result of how the sycl nightly images are created pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt echo "-----" @@ -112,7 +118,7 @@ runs: esac # TODO accomodate for different GPUs and backends SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}" - SAVE_TIMESTAMP="$(date +'%Y%m%d_%H%M%S')" + SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')" # Timestamps are in UTC time taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ "$(realpath ./llvm_test_workdir)" \ @@ -126,8 +132,8 @@ runs: echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ --name Baseline_PVC_L0 \ - --compare-file "./llvm-ci-perf-results/$RUNNER_NAME/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ - --results-dir "./llvm-ci-perf-results/$RUNNER_NAME/results/" + --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ + --results-dir "./llvm-ci-perf-results/results/" - name: Push compute-benchmarks results if: inputs.upload_results == 'true' && always() From c69e8745bfd41c79fd4af73ebdc6c690159b4e72 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 21:43:38 -0700 Subject: [PATCH 092/114] Properly load repo/commit information in CI --- .../actions/run-tests/benchmark_v2/action.yml | 20 ++++- devops/scripts/benchmarks/history.py | 73 +++++++++++-------- devops/scripts/benchmarks/main.py | 57 +++++++++++---- devops/scripts/benchmarks/options.py | 19 +++-- devops/scripts/benchmarks/utils/validate.py | 60 +++++++++++++-- 5 files changed, 167 insertions(+), 62 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index c68dda5e0d3d0..903cb07256c36 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -110,6 +110,19 @@ runs: pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt echo "-----" + # clang builds have git repo / commit hashes in their --version output, + # same goes for dpcpp. Obtain git repo / commit hash info this way: + + # First line of --version is formatted 'clang version ... ( )' + # thus we parse for ( ): + sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()' + if [ -z "$sycl_git_info" ]; then + echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" + exit 1 + fi + sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" + sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" + case "$ONEAPI_DEVICE_SELECTOR" in level_zero:*) SAVE_SUFFIX="L0" ;; level_zero_v2:*) SAVE_SUFFIX="L0v2" ;; @@ -128,10 +141,13 @@ runs: --results-dir "./llvm-ci-perf-results/" \ --output-dir "./llvm-ci-perf-results/" \ --preset "$PRESET" \ - --timestamp-override "$SAVE_TIMESTAMP" + # CI options: + --timestamp-override "$SAVE_TIMESTAMP" \ + --sycl-github-repo "$sycl_git_repo" \ + --sycl-commit "$sycl_git_commit" echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ - --name Baseline_PVC_L0 \ + --name "$SAVE_NAME" \ --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ --results-dir "./llvm-ci-perf-results/results/" diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 46d1d0e1d1212..eaaaa8276a8be 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -58,45 +58,58 @@ def extract_timestamp(file_path: Path) -> str: self.runs = benchmark_runs def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: - try: - script_dir = os.path.dirname(os.path.abspath(__file__)) - result = run("git rev-parse --short HEAD", cwd=script_dir) - git_hash = result.stdout.decode().strip() - - # Get the GitHub repo URL from git remote - remote_result = run("git remote get-url origin", cwd=script_dir) - remote_url = remote_result.stdout.decode().strip() - - # Convert SSH or HTTPS URL to owner/repo format - if remote_url.startswith("git@github.com:"): - # SSH format: git@github.com:owner/repo.git - github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") - elif remote_url.startswith("https://github.com/"): - # HTTPS format: https://github.com/owner/repo.git - github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") - else: + + def git_info_from_path(path: Path) -> (str, str): + """ + Derives git repo, commit information from git repo located in path. + + Returns: + (str, str): git_hash, github_repo + """ + try: + result = run("git rev-parse --short HEAD", cwd=path) + git_hash = result.stdout.decode().strip() + + # Get the GitHub repo URL from git remote + remote_result = run("git remote get-url origin", cwd=path) + remote_url = remote_result.stdout.decode().strip() + + # Convert SSH or HTTPS URL to owner/repo format + if remote_url.startswith("git@github.com:"): + # SSH format: git@github.com:owner/repo.git + github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") + elif remote_url.startswith("https://github.com/"): + # HTTPS format: https://github.com/owner/repo.git + github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") + else: + github_repo = None + + except: + git_hash = "unknown" github_repo = None + + return git_hash, github_repo - except: - git_hash = "unknown" - github_repo = None + if options.sycl_commit is None or options.sycl_github_repo is None: + git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__))) + else: + git_hash, github_repo = options.sycl_commit, options.sycl_github_repo # Check if RUNNER_NAME environment variable has been declared. # - # RUNNER_NAME is always present in github runner environments. Because - # github runners obfusicate hostnames, using socket.gethostname() - # produces different hostnames when ran on the same machine multiple - # times. Thus, we rely on the RUNNER_NAME variable when running on - # github runners. + # Github runners obfusicate hostnames, thus running socket.gethostname() + # twice produces two different hostnames. Since github runners always + # define a RUNNER_NAME variable, use RUNNER_NAME instead if it exists: hostname = os.getenv("RUNNER_NAME") if hostname is None: hostname = socket.gethostname() - elif not Validate.runner_name(hostname): - # However, nothing stops github runner env variables (including - # RUNNER_NAME) from being modified by external actors. Ensure - # RUNNER_NAME contains nothing malicious: + else: + # Ensure RUNNER_NAME has not been tampered with: # TODO is this overkill? - raise ValueError("Illegal characters found in specified RUNNER_NAME.") + Validate.runner_name( + hostname, + throw=ValueError("Illegal characters found in specified RUNNER_NAME.") + ) return BenchmarkRun( name=name, diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 3ba6190843688..04d8a37510d9b 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -476,12 +476,6 @@ def validate_and_parse_env_args(env_args): help="Benchmark preset to run", default=options.preset, ) - parser.add_argument( - "--results-dir", - type=str, - help="Specify a custom directory to load/store (historical) results from", - default=options.results_directory_override, - ) parser.add_argument( "--build-jobs", type=int, @@ -489,16 +483,45 @@ def validate_and_parse_env_args(env_args): default=options.build_jobs, ) parser.add_argument( - "--timestamp-override", + "--hip-arch", type=str, - help="Used in CI to enforce use of same timestamp across scripts", + help="HIP device architecture", default=None, ) + + # Options intended for CI: parser.add_argument( - "--hip-arch", + "--results-dir", type=str, - help="HIP device architecture", - default=None, + help="Specify a custom directory to load/store (historical) results from", + default=options.results_directory_override, + ) + parser.add_argument( + "--timestamp-override", + type=lambda ts: Validate.timestamp( + ts, + throw=argparse.ArgumentTypeError("Specified timestamp not in YYYYMMDD_HHMMSS format.") + ), + help="Manually specify timestamp used in metadata", + default=options.timestamp_override, + ) + parser.add_argument( + "--sycl-github-repo", + type=lambda gh_repo: Validate.github_repo( + gh_repo, + throw=argparse.ArgumentTypeError("Specified SYCL github repo not in / format.") + ), + help="Manually specify SYCL github repo used in metadata", + default=options.sycl_github_repo, + ) + parser.add_argument( + "--sycl-commit", + type=lambda commit: Validate.commit_hash( + commit, + throw=argparse.ArgumentTypeError("Specified SYCL commit is not a valid commit hash.") + ), + help="Manually specify commit hash used to build SYCL in metadata", + default=options.sycl_commit, ) args = parser.parse_args() @@ -539,14 +562,18 @@ def validate_and_parse_env_args(env_args): if not os.path.isdir(args.output_dir): parser.error("Specified --output-dir is not a valid path") options.output_directory = os.path.abspath(args.output_dir) - if args.timestamp_override is not None: - if not Validate.timestamp(args.timestamp_override): - parser.error("--timestamp_override is not a valid timestamp") - options.timestamp_override = args.timestamp_override + + # Options intended for CI: + options.timestamp_override = args.timestamp_override if args.results_dir is not None: if not os.path.isdir(args.results_dir): parser.error("Specified --results-dir is not a valid path") options.results_directory_override = os.path.abspath(args.results_dir) + if args.sycl_github_repo is not None or args.sycl_commit is not None: + if args.sycl_github_repo is None or args.sycl_commit is None: + parser.error("--sycl-github-repo and --sycl-commit must both be defined together") + options.sycl_github_repo = args.sycl_github_repo + options.sycl_commit = args.sycl_commit benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index e513767e05747..9063dd23585da 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -46,20 +46,23 @@ class Options: preset: str = "Full" build_jobs: int = multiprocessing.cpu_count() - # Options applicable to CI only: + # Options intended for CI: regression_threshold: float = 0.05 - # In CI, it may be necessary to e.g. compare or redo benchmark runs. - # A timestamp is generated at the beginning of the CI run and used through - # the entire CI process, instead of scripts generating their own timestamps - # every time a script runs (default behavior). + # It's necessary in CI to compare or redo benchmark runs. Instead of + # generating a new timestamp each run by default, specify a single timestamp + # to use across the entire CI run. timestamp_override: str = None - # By default, the directory to fetch results from is the benchmark working - # directory specified in the CLI args, hence a default value of "None" as - # the value is decided via runtime. + # The default directory to fetch results from is args.benchmark_directory, + # hence a default value of "None" as the value is decided during runtime. # # However, sometimes you may want to fetch results from a different # directory, i.e. in CI when you clone the results directory elsewhere. results_directory_override: str = None + # By default, we fetch SYCL commit info from the folder where main.py is + # located. This doesn't work right when CI uses different commits for e.g. + # CI scripts vs SYCl build source. + sycl_github_repo: str = None + sycl_commit: str = None options = Options() diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py index 893bba54ef630..77bd13f4f9971 100644 --- a/devops/scripts/benchmarks/utils/validate.py +++ b/devops/scripts/benchmarks/utils/validate.py @@ -1,22 +1,68 @@ import re +def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None): + """ + Returns True if val is matched by pattern defined by regex, otherwise False. + + If `throw` argument is not None: return val as-is if val matches regex, + otherwise raise error defined by throw. + """ + is_matching: bool = re.compile(regex).match(val) is not None + + if throw is None: return is_matching + elif not is_matching: raise throw + else: return val + + class Validate: """Static class containing methods for validating various fields""" @staticmethod - def runner_name(runner_name: str) -> bool: + def runner_name(runner_name: str, throw: Exception = None): """ Returns True if runner_name is clean (no illegal characters). """ - runner_name_re = re.compile(r"[a-zA-Z0-9_]+") - return runner_name_re.match(runner_name) is not None + return validate_on_re(runner_name, r"^[a-zA-Z0-9_]+$", throw=throw) @staticmethod - def timestamp(t: str) -> bool: + def timestamp(t: str, throw: Exception = None): """ Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False. + + If throw argument is specified: return t as-is if t is in aforementioned + format, otherwise raise error defined by throw. + """ + return validate_on_re( + t, + r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$", + throw=throw + ) + + @staticmethod + def github_repo(repo: str, throw: Exception = None): + """ + Returns True if repo is of form / + + If throw argument is specified: return repo as-is if repo is in + aforementioned format, otherwise raise error defined by throw. """ - timestamp_re = re.compile( - r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" + return validate_on_re( + re.sub(r"^https?://github.com/", "", repo), + r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$", + throw=throw ) - return timestamp_re.match(t) is not None \ No newline at end of file + + @staticmethod + def commit_hash(commit: str, throw: Exception = None, trunc: int = 40): + """ + Returns True if commit is a valid git commit hash. + + If throw argument is specified: return commit hash (truncated to trunc + chars long) if commit is a valid commit hash, otherwise raise error + defined by throw. + """ + commit_re = r"^[a-f0-9]{7,40}$" + if throw is None: + return validate_on_re(commit, commit_re) + else: + return validate_on_re(commit, commit_re, throw=throw)[:trunc] From 6224eaa01ce9fc052a142f2ea5d9f7bcb8702c93 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 22:31:31 -0700 Subject: [PATCH 093/114] [test] debug message --- devops/actions/run-tests/benchmark_v2/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 903cb07256c36..1bfa605c98e8b 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -115,6 +115,7 @@ runs: # First line of --version is formatted 'clang version ... ( )' # thus we parse for ( ): + clang++ --version sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()' if [ -z "$sycl_git_info" ]; then echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" From f0a9a9722cab9f6462deb8db84361d71b68d04ed Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 22:36:52 -0700 Subject: [PATCH 094/114] I forgot a ) --- devops/actions/run-tests/benchmark_v2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 1bfa605c98e8b..3964d4f801851 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -116,7 +116,7 @@ runs: # First line of --version is formatted 'clang version ... ( )' # thus we parse for ( ): clang++ --version - sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()' + sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()')" if [ -z "$sycl_git_info" ]; then echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" exit 1 From b68c11914967063c192705bc495389c0beaba5b2 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 22:39:51 -0700 Subject: [PATCH 095/114] misplaced ) --- devops/actions/run-tests/benchmark_v2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 3964d4f801851..f83276c3f2dba 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -116,7 +116,7 @@ runs: # First line of --version is formatted 'clang version ... ( )' # thus we parse for ( ): clang++ --version - sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$')" | tr -d '()')" + sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')" if [ -z "$sycl_git_info" ]; then echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" exit 1 From cc17af9f71f5f6ef2d2c7a35702133aec5b07370 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 22:47:24 -0700 Subject: [PATCH 096/114] revert test --- devops/actions/run-tests/benchmark_v2/action.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index f83276c3f2dba..12052e33a6b76 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -115,7 +115,6 @@ runs: # First line of --version is formatted 'clang version ... ( )' # thus we parse for ( ): - clang++ --version sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')" if [ -z "$sycl_git_info" ]; then echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" @@ -142,7 +141,6 @@ runs: --results-dir "./llvm-ci-perf-results/" \ --output-dir "./llvm-ci-perf-results/" \ --preset "$PRESET" \ - # CI options: --timestamp-override "$SAVE_TIMESTAMP" \ --sycl-github-repo "$sycl_git_repo" \ --sycl-commit "$sycl_git_commit" From 64832a64d27820c595729f4823d1d4f2f0e54fc4 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 23:04:27 -0700 Subject: [PATCH 097/114] [test] debug statements --- devops/actions/run-tests/benchmark_v2/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 12052e33a6b76..b7c3840948681 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -122,6 +122,7 @@ runs: fi sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" + echo "$sycl_git_repo, $sycl_git_commit" case "$ONEAPI_DEVICE_SELECTOR" in level_zero:*) SAVE_SUFFIX="L0" ;; From ab0700178b7bccdbcd3fae34747b63467e41b11b Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Wed, 2 Apr 2025 23:08:41 -0700 Subject: [PATCH 098/114] Whitespace was causing issues? --- devops/actions/run-tests/benchmark_v2/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index b7c3840948681..928a19d8626c0 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -143,7 +143,7 @@ runs: --output-dir "./llvm-ci-perf-results/" \ --preset "$PRESET" \ --timestamp-override "$SAVE_TIMESTAMP" \ - --sycl-github-repo "$sycl_git_repo" \ + --sycl-github-repo "$sycl_git_repo" \ --sycl-commit "$sycl_git_commit" echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ From 2b94436f9d6c1ccc5f872ef2813d6295af938e25 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 3 Apr 2025 14:13:45 -0700 Subject: [PATCH 099/114] rename variables and remove sycl_ prefix --- devops/scripts/benchmarks/compare.py | 7 ++----- devops/scripts/benchmarks/history.py | 4 ++-- devops/scripts/benchmarks/main.py | 26 +++++++++++++------------- devops/scripts/benchmarks/options.py | 4 ++-- 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index 59ba2b9b69c96..fbce279a9ea13 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -94,7 +94,7 @@ def get_result_paths() -> list[str]: ) ) - def check_benchmark_result(result: BenchmarkRun) -> bool: + def validate_benchmark_result(result: BenchmarkRun) -> bool: """ Returns True if result file: - Was ran on the target machine/hostname specified @@ -127,16 +127,13 @@ def check_benchmark_result(result: BenchmarkRun) -> bool: # - does not contain enough information to filter out results, i.e. # no hostname information. # - information in filename may be mismatched from metadata. - if not check_benchmark_result(result): + if not validate_benchmark_result(result): continue for test_run in result.results: def reset_aggregate() -> dict: return { "command_args": set(test_run.command[1:]), - # The assumption here is that "value" is median - # TODO standardization should happen here on what "value" - # really is "aggregate": aggregator(starting_elements=[test_run.value]) } diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index eaaaa8276a8be..e28914e74ad13 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -90,10 +90,10 @@ def git_info_from_path(path: Path) -> (str, str): return git_hash, github_repo - if options.sycl_commit is None or options.sycl_github_repo is None: + if options.git_commit_override is None or options.github_repo_override is None: git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__))) else: - git_hash, github_repo = options.sycl_commit, options.sycl_github_repo + git_hash, github_repo = options.git_commit_override, options.github_repo_override # Check if RUNNER_NAME environment variable has been declared. # diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 04d8a37510d9b..2006a9084d932 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -506,22 +506,22 @@ def validate_and_parse_env_args(env_args): default=options.timestamp_override, ) parser.add_argument( - "--sycl-github-repo", + "--github-repo", type=lambda gh_repo: Validate.github_repo( gh_repo, - throw=argparse.ArgumentTypeError("Specified SYCL github repo not in / format.") + throw=argparse.ArgumentTypeError("Specified github repo not in / format.") ), - help="Manually specify SYCL github repo used in metadata", - default=options.sycl_github_repo, + help="Manually specify github repo metadata of component tested (e.g. SYCL, UMF)", + default=options.github_repo_override, ) parser.add_argument( - "--sycl-commit", + "--git-commit", type=lambda commit: Validate.commit_hash( commit, - throw=argparse.ArgumentTypeError("Specified SYCL commit is not a valid commit hash.") + throw=argparse.ArgumentTypeError("Specified commit is not a valid commit hash.") ), - help="Manually specify commit hash used to build SYCL in metadata", - default=options.sycl_commit, + help="Manually specify commit hash metadata of component tested (e.g. SYCL, UMF)", + default=options.git_commit_override, ) args = parser.parse_args() @@ -569,11 +569,11 @@ def validate_and_parse_env_args(env_args): if not os.path.isdir(args.results_dir): parser.error("Specified --results-dir is not a valid path") options.results_directory_override = os.path.abspath(args.results_dir) - if args.sycl_github_repo is not None or args.sycl_commit is not None: - if args.sycl_github_repo is None or args.sycl_commit is None: - parser.error("--sycl-github-repo and --sycl-commit must both be defined together") - options.sycl_github_repo = args.sycl_github_repo - options.sycl_commit = args.sycl_commit + if args.github_repo is not None or args.git_commit is not None: + if args.github_repo is None or args.git_commit is None: + parser.error("--github-repo and --git_commit must both be defined together") + options.github_repo_override = args.github_repo + options.git_commit_override = args.git_commit benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 9063dd23585da..b96801de0cb06 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -61,8 +61,8 @@ class Options: # By default, we fetch SYCL commit info from the folder where main.py is # located. This doesn't work right when CI uses different commits for e.g. # CI scripts vs SYCl build source. - sycl_github_repo: str = None - sycl_commit: str = None + github_repo_override: str = None + git_commit_override: str = None options = Options() From 63c3092428fb7ce0e2b4c41e312907572a665969 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Thu, 3 Apr 2025 14:14:04 -0700 Subject: [PATCH 100/114] Delete text message, fix whitespace --- .github/workflows/sycl-docs.yml | 6 +++--- devops/actions/run-tests/benchmark_v2/action.yml | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/sycl-docs.yml b/.github/workflows/sycl-docs.yml index 6b748ec9c7ebb..7bb6a568892a8 100644 --- a/.github/workflows/sycl-docs.yml +++ b/.github/workflows/sycl-docs.yml @@ -52,10 +52,10 @@ jobs: cp -r $GITHUB_WORKSPACE/repo/devops/scripts/benchmarks/html benchmarks touch .nojekyll # Update benchmarking dashboard configuration - cat << 'EOF' > benchmarks/config.js - remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/UR_DNP_INTEL_06_03/data.json'; + cat << EOF > benchmarks/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/intel/llvm-ci-perf-results/refs/heads/unify-ci/data.json'; defaultCompareNames = ["Baseline_PVC_L0"]; - EOF + EOF # Upload the generated docs as an artifact and deploy to GitHub Pages. - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 928a19d8626c0..e829b7c288bbd 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -122,7 +122,6 @@ runs: fi sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" - echo "$sycl_git_repo, $sycl_git_commit" case "$ONEAPI_DEVICE_SELECTOR" in level_zero:*) SAVE_SUFFIX="L0" ;; @@ -143,8 +142,8 @@ runs: --output-dir "./llvm-ci-perf-results/" \ --preset "$PRESET" \ --timestamp-override "$SAVE_TIMESTAMP" \ - --sycl-github-repo "$sycl_git_repo" \ - --sycl-commit "$sycl_git_commit" + --github-repo "$sycl_git_repo" \ + --git-commit "$sycl_git_commit" echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ --name "$SAVE_NAME" \ From ca96184b149ed842ca67709e57f7e98627ce9204 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 4 Apr 2025 13:48:06 -0700 Subject: [PATCH 101/114] Set up multiple push attempts in CI --- .../actions/run-tests/benchmark_v2/action.yml | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index e829b7c288bbd..734bd45c8b16d 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -157,13 +157,43 @@ runs: cd "./llvm-ci-perf-results" git config user.name "SYCL Benchmarking Bot" git config user.email "sys_sycl_benchmarks@intel.com" - git pull + results_branch="unify-ci" + git add . - # Make sure changes have been made if git diff --quiet && git diff --cached --quiet; then echo "No new results added, skipping push." - else - git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" unify-ci + exit 0 fi + for attempt in 1 2 3; do + echo "Attempt $attempt to push new results" + git add . + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)" + + if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then + echo "Push succeeded" + break + fi + + echo "Push failed, retrying..." + if [ -n "$results_file" ]; then + cached_result="$(mktemp -d)/$(basename $results_file)" + mv "$results_file" "$cached_result" + + git reset --hard "origin/$results_branch" + git pull origin "$results_branch" + + mv "$cached_result" "$results_file" + fi + + echo "Regenerating data.json..." + cd ../ + ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ + --dry-run + cd - + done From a3d7ff6e0606d2d2be74acd71b2f0d753864f739 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Fri, 4 Apr 2025 14:12:41 -0700 Subject: [PATCH 102/114] Apply clang format --- devops/scripts/benchmarks/compare.py | 119 ++++++++++++-------- devops/scripts/benchmarks/history.py | 31 +++-- devops/scripts/benchmarks/main.py | 12 +- devops/scripts/benchmarks/presets.py | 21 ++-- devops/scripts/benchmarks/utils/validate.py | 14 ++- 5 files changed, 119 insertions(+), 78 deletions(-) diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index fbce279a9ea13..47170841c693e 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -11,9 +11,11 @@ from pathlib import Path from dataclasses import dataclass, asdict + @dataclass class BenchmarkHistoricAverage: """Contains historic average information for 1 benchmark""" + # Name of benchmark as defined in Benchmark class definition name: str @@ -32,27 +34,32 @@ class BenchmarkHistoricAverage: # # This exists to ensure benchmarks called using different arguments are not # compared together. - command_args: set[str] + command_args: set[str] # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself? class Compare: """Class containing logic for comparisons between results""" + @staticmethod def get_hist_avg( - result_name: str, result_dir: str, hostname: str, cutoff: str, - aggregator: Aggregator = SimpleMedian, exclude: list[str] = [] + result_name: str, + result_dir: str, + hostname: str, + cutoff: str, + aggregator: Aggregator = SimpleMedian, + exclude: list[str] = [], ) -> dict[str, BenchmarkHistoricAverage]: """ Create a historic average for results named result_name in result_dir using the specified aggregator Args: - result_name (str): Name of benchmarking result to obtain average for + result_name (str): Name of benchmarking result to obtain average for result_dir (str): Path to folder containing benchmark results cutoff (str): Timestamp in YYYYMMDD_HHMMSS of oldest results used in average calcultaion - hostname (str): Hostname of machine on which results ran on + hostname (str): Hostname of machine on which results ran on aggregator (Aggregator): The aggregator to use for calculating the historic average exclude (list[str]): List of filenames (only the stem) to exclude @@ -90,10 +97,10 @@ def get_result_paths() -> list[str]: # Result file is not excluded and f.stem not in exclude, # Assumes format is _YYYYMMDD_HHMMSS.json - cache_dir.glob(f"{result_name}_*_*.json") + cache_dir.glob(f"{result_name}_*_*.json"), ) ) - + def validate_benchmark_result(result: BenchmarkRun) -> bool: """ Returns True if result file: @@ -105,21 +112,25 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool: if result.hostname != hostname: return False if result.name != result_name: - print(f"Warning: Result file {result_path} does not match specified result name {result.name}.") + print( + f"Warning: Result file {result_path} does not match specified result name {result.name}." + ) return False - if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(tzinfo=timezone.utc): + if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace( + tzinfo=timezone.utc + ): return False return True # key: name of the benchmark test result # value: { command_args: set[str], aggregate: Aggregator } - # + # # This is then used to build a dict[BenchmarkHistoricAverage] used # to find historic averages. average_aggregate: dict[str, dict] = dict() - + for result_path in get_result_paths(): - with result_path.open('r') as result_f: + with result_path.open("r") as result_f: result = BenchmarkRun.from_json(json.load(result_f)) # Perform another check on result file here, as get_result_paths() @@ -131,10 +142,11 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool: continue for test_run in result.results: + def reset_aggregate() -> dict: - return { + return { "command_args": set(test_run.command[1:]), - "aggregate": aggregator(starting_elements=[test_run.value]) + "aggregate": aggregator(starting_elements=[test_run.value]), } # Add every benchmark run to average_aggregate: @@ -142,28 +154,36 @@ def reset_aggregate() -> dict: average_aggregate[test_run.name] = reset_aggregate() else: # Check that we are comparing runs with the same cmd args: - if set(test_run.command[1:]) == average_aggregate[test_run.name]["command_args"]: - average_aggregate[test_run.name]["aggregate"].add(test_run.value) + if ( + set(test_run.command[1:]) + == average_aggregate[test_run.name]["command_args"] + ): + average_aggregate[test_run.name]["aggregate"].add( + test_run.value + ) else: # If the command args used between runs are different, # discard old run data and prefer new command args # # This relies on the fact that paths from get_result_paths() # is sorted from older to newer - print(f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs.") - print("DISCARDING older data and OVERRIDING with data using new arg.") + print( + f"Warning: Command args for {test_run.name} from {result_path} is different from prior runs." + ) + print( + "DISCARDING older data and OVERRIDING with data using new arg." + ) average_aggregate[test_run.name] = reset_aggregate() - + return { name: BenchmarkHistoricAverage( name=name, average_type=stats["aggregate"].get_type(), value=stats["aggregate"].get_avg(), - command_args=stats["command_args"] + command_args=stats["command_args"], ) for name, stats in average_aggregate.items() } - def to_hist_avg( hist_avg: dict[str, BenchmarkHistoricAverage], target: BenchmarkRun @@ -181,12 +201,14 @@ def to_hist_avg( Returns: A tuple returning (list of improved tests, list of regressed tests). """ + def halfway_round(value: int, n: int): """ Python's default round() does banker's rounding, which doesn't make much sense here. This rounds 0.5 to 1, and -0.5 to -1 """ - if value == 0: return 0 + if value == 0: + return 0 return int(value * 10**n + 0.5 * (value / abs(value))) / 10**n improvement = [] @@ -198,11 +220,11 @@ def halfway_round(value: int, n: int): if hist_avg[test.name].command_args != set(test.command[1:]): print(f"Warning: skipped {test.name} due to command args mismatch.") continue - + delta = 1 - ( test.value / hist_avg[test.name].value - if test.lower_is_better else - hist_avg[test.name].value / test.value + if test.lower_is_better + else hist_avg[test.name].value / test.value ) def perf_diff_entry() -> dict: @@ -221,9 +243,11 @@ def perf_diff_entry() -> dict: return improvement, regression - def to_hist( - avg_type: str, result_name: str, compare_file: str, result_dir: str, + avg_type: str, + result_name: str, + compare_file: str, + result_dir: str, cutoff: str, ) -> tuple: """ @@ -236,7 +260,7 @@ def to_hist( result_dir (str): Directory to look for results in cutoff (str): Timestamp (in YYYYMMDD_HHMMSS) indicating the oldest result included in the historic average calculation - avg_type (str): Type of "average" (measure of central tendency) to + avg_type (str): Type of "average" (measure of central tendency) to use in historic "average" calculation Returns: @@ -245,14 +269,14 @@ def to_hist( avg_type, and delta field added, indicating the historic average, type of central tendency used for historic average, and the delta from the average for this benchmark run. - """ + """ if avg_type != "median": print("Only median is currently supported: Refusing to continue.") exit(1) try: - with open(compare_file, 'r') as compare_f: + with open(compare_file, "r") as compare_f: compare_result = BenchmarkRun.from_json(json.load(compare_f)) except: print(f"Unable to open {compare_file}.") @@ -260,7 +284,9 @@ def to_hist( # Sanity checks: if compare_result.hostname == "Unknown": - print("Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue.") + print( + "Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue." + ) exit(1) if not Validate.timestamp(cutoff): print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.") @@ -272,7 +298,7 @@ def to_hist( result_dir, compare_result.hostname, cutoff, - exclude=[Path(compare_file).stem] + exclude=[Path(compare_file).stem], ) return Compare.to_hist_avg(hist_avg, compare_result) @@ -280,36 +306,35 @@ def to_hist( if __name__ == "__main__": parser = argparse.ArgumentParser(description="Compare benchmark results") subparsers = parser.add_subparsers(dest="operation", required=True) - parser_avg = subparsers.add_parser("to_hist", help="Compare a benchmark result to historic average") + parser_avg = subparsers.add_parser( + "to_hist", help="Compare a benchmark result to historic average" + ) parser_avg.add_argument( "--avg-type", type=str, help="Measure of central tendency to use when computing historic average", - default="median" + default="median", ) parser_avg.add_argument( "--name", type=str, required=True, - help="Save name of the benchmark results to compare to" + help="Save name of the benchmark results to compare to", ) parser_avg.add_argument( "--compare-file", type=str, required=True, - help="Result file to compare against te historic average" + help="Result file to compare against te historic average", ) parser_avg.add_argument( - "--results-dir", - type=str, - required=True, - help="Directory storing results" + "--results-dir", type=str, required=True, help="Directory storing results" ) parser_avg.add_argument( "--cutoff", type=str, help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation", - default="20000101_010101" + default="20000101_010101", ) args = parser.parse_args() @@ -322,11 +347,7 @@ def to_hist( raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.") improvements, regressions = Compare.to_hist( - "median", - args.name, - args.compare_file, - args.results_dir, - args.cutoff + "median", args.name, args.compare_file, args.results_dir, args.cutoff ) def print_regression(entry: dict): @@ -339,10 +360,12 @@ def print_regression(entry: dict): if improvements: print("#\n# Improvements:\n#\n") - for test in improvements: print_regression(test) + for test in improvements: + print_regression(test) if regressions: print("#\n# Regressions:\n#\n") - for test in regressions: print_regression(test) + for test in regressions: + print_regression(test) exit(1) # Exit 1 to trigger github test failure else: print("Unsupported operation: exiting.") diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index fd0b71c04908c..30dc607aa54a6 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -31,7 +31,9 @@ def load_result(self, file_path: Path) -> BenchmarkRun: def load(self, n: int): results_dir = Path(self.dir) / "results" if not results_dir.exists() or not results_dir.is_dir(): - print(f"Warning: {results_dir} is not a valid directory: no historic results loaded.") + print( + f"Warning: {results_dir} is not a valid directory: no historic results loaded." + ) return # Get all JSON files in the results directory @@ -40,8 +42,8 @@ def load(self, n: int): # Extract timestamp and sort files by it def extract_timestamp(file_path: Path) -> str: try: - # Assumes results are stored as _YYYYMMDD_HHMMSS.json - ts = file_path.stem[-len("YYYYMMDD_HHMMSS"):] + # Assumes results are stored as _YYYYMMDD_HHMMSS.json + ts = file_path.stem[-len("YYYYMMDD_HHMMSS") :] return ts if Validate.timestamp(ts) else "" except IndexError: return "" @@ -80,21 +82,28 @@ def git_info_from_path(path: Path) -> (str, str): github_repo = remote_url.split("git@github.com:")[1].rstrip(".git") elif remote_url.startswith("https://github.com/"): # HTTPS format: https://github.com/owner/repo.git - github_repo = remote_url.split("https://github.com/")[1].rstrip(".git") + github_repo = remote_url.split("https://github.com/")[1].rstrip( + ".git" + ) else: github_repo = None except: git_hash = "unknown" github_repo = None - + return git_hash, github_repo if options.git_commit_override is None or options.github_repo_override is None: - git_hash, github_repo = git_info_from_path(os.path.dirname(os.path.abspath(__file__))) + git_hash, github_repo = git_info_from_path( + os.path.dirname(os.path.abspath(__file__)) + ) else: - git_hash, github_repo = options.git_commit_override, options.github_repo_override - + git_hash, github_repo = ( + options.git_commit_override, + options.github_repo_override, + ) + # Check if RUNNER_NAME environment variable has been declared. # # Github runners obfusicate hostnames, thus running socket.gethostname() @@ -108,7 +117,7 @@ def git_info_from_path(path: Path) -> (str, str): # TODO is this overkill? Validate.runner_name( hostname, - throw=ValueError("Illegal characters found in specified RUNNER_NAME.") + throw=ValueError("Illegal characters found in specified RUNNER_NAME."), ) compute_runtime = ( @@ -139,8 +148,8 @@ def save(self, save_name, results: list[Result], to_file=True): # Use formatted timestamp for the filename timestamp = ( datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S") - if options.timestamp_override is None else - options.timestamp_override + if options.timestamp_override is None + else options.timestamp_override ) file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json")) with file_path.open("w") as file: diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 2006a9084d932..397632e138978 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -500,7 +500,9 @@ def validate_and_parse_env_args(env_args): "--timestamp-override", type=lambda ts: Validate.timestamp( ts, - throw=argparse.ArgumentTypeError("Specified timestamp not in YYYYMMDD_HHMMSS format.") + throw=argparse.ArgumentTypeError( + "Specified timestamp not in YYYYMMDD_HHMMSS format." + ), ), help="Manually specify timestamp used in metadata", default=options.timestamp_override, @@ -509,7 +511,9 @@ def validate_and_parse_env_args(env_args): "--github-repo", type=lambda gh_repo: Validate.github_repo( gh_repo, - throw=argparse.ArgumentTypeError("Specified github repo not in / format.") + throw=argparse.ArgumentTypeError( + "Specified github repo not in / format." + ), ), help="Manually specify github repo metadata of component tested (e.g. SYCL, UMF)", default=options.github_repo_override, @@ -518,7 +522,9 @@ def validate_and_parse_env_args(env_args): "--git-commit", type=lambda commit: Validate.commit_hash( commit, - throw=argparse.ArgumentTypeError("Specified commit is not a valid commit hash.") + throw=argparse.ArgumentTypeError( + "Specified commit is not a valid commit hash." + ), ), help="Manually specify commit hash metadata of component tested (e.g. SYCL, UMF)", default=options.git_commit_override, diff --git a/devops/scripts/benchmarks/presets.py b/devops/scripts/benchmarks/presets.py index 61ce9f4aebc49..fc7e1ffb59f3d 100644 --- a/devops/scripts/benchmarks/presets.py +++ b/devops/scripts/benchmarks/presets.py @@ -42,27 +42,24 @@ def enabled_suites(preset: str) -> list[str]: # Utility scripts to validate a given preset, useful for e.g. CI: + def main(): parser = argparse.ArgumentParser(description="Benchmark Preset Utilities") subparsers = parser.add_subparsers(dest="command", required=True) query_parser = subparsers.add_parser( - "query", - help="Query benchmarks ran by a preset (as defined in presets.py)" - ) - query_parser.add_argument( - "preset_to_query", - type=str, - help="preset name to query" + "query", help="Query benchmarks ran by a preset (as defined in presets.py)" ) + query_parser.add_argument("preset_to_query", type=str, help="preset name to query") query_parser.add_argument( - "-q", "--quiet", + "-q", + "--quiet", action="store_true", - help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script." + help="Disable stdout messages: Useful if you want to check if a preset exists within a shell script.", ) args = parser.parse_args() - if args.command == 'query': + if args.command == "query": if args.preset_to_query in presets: if not args.quiet: print(f"Benchmark suites to be ran in {args.preset_to_query}:") @@ -70,8 +67,10 @@ def main(): print(suite) exit(0) else: - if not args.quiet: print(f"Error: No preset named '{args.preset_to_query}'.") + if not args.quiet: + print(f"Error: No preset named '{args.preset_to_query}'.") exit(1) + if __name__ == "__main__": main() diff --git a/devops/scripts/benchmarks/utils/validate.py b/devops/scripts/benchmarks/utils/validate.py index 77bd13f4f9971..b0a2658865562 100644 --- a/devops/scripts/benchmarks/utils/validate.py +++ b/devops/scripts/benchmarks/utils/validate.py @@ -1,5 +1,6 @@ import re + def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None): """ Returns True if val is matched by pattern defined by regex, otherwise False. @@ -9,9 +10,12 @@ def validate_on_re(val: str, regex: re.Pattern, throw: Exception = None): """ is_matching: bool = re.compile(regex).match(val) is not None - if throw is None: return is_matching - elif not is_matching: raise throw - else: return val + if throw is None: + return is_matching + elif not is_matching: + raise throw + else: + return val class Validate: @@ -35,7 +39,7 @@ def timestamp(t: str, throw: Exception = None): return validate_on_re( t, r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$", - throw=throw + throw=throw, ) @staticmethod @@ -49,7 +53,7 @@ def github_repo(repo: str, throw: Exception = None): return validate_on_re( re.sub(r"^https?://github.com/", "", repo), r"^[a-zA-Z0-9_-]{1,39}/[a-zA-Z0-9_.-]{1,100}$", - throw=throw + throw=throw, ) @staticmethod From ba7df662dfc0856c8bc4d3b9ef68dc5d0b8d05e1 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 7 Apr 2025 08:04:36 -0700 Subject: [PATCH 103/114] Archive benchmark runs --- .github/workflows/sycl-ur-perf-benchmarking.yml | 2 -- devops/actions/run-tests/benchmark_v2/action.yml | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index 2713d60f0a2b9..7dbb4da228188 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -1,8 +1,6 @@ name: Run Benchmarks on: - schedule: - - cron: '0 1 * * *' # 2 hrs earlier than sycl-nightly.yml workflow_call: inputs: preset: diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml index 734bd45c8b16d..c7aa4f3f48c2e 100644 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ b/devops/actions/run-tests/benchmark_v2/action.yml @@ -150,7 +150,15 @@ runs: --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ --results-dir "./llvm-ci-perf-results/results/" - - name: Push compute-benchmarks results + - name: Cache changes to benchmark folder for archival purposes + shell: bash + run: | + cd "./llvm-ci-perf-results" + for diff in $(git diff HEAD --name-only); do + mkdir -p "../cached_changes/$(dirname $diff)" + cp "$diff" "../cached_changes/$diff" + done + - name: Push benchmarks results if: inputs.upload_results == 'true' && always() shell: bash run: | @@ -197,3 +205,9 @@ runs: --dry-run cd - done + - name: Archive benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Benchmark run ${{ github.run_id }} (${{ runner.name }}) + path: ./cached_changes From 989441dda4acf21d81b8c1383834344109011a86 Mon Sep 17 00:00:00 2001 From: "Li, Ian" Date: Mon, 7 Apr 2025 08:10:55 -0700 Subject: [PATCH 104/114] Remove legacy benchmarking code --- .github/workflows/sycl-linux-run-tests.yml | 18 +- .../workflows/sycl-ur-perf-benchmarking.yml | 4 +- .../actions/benchmarking/aggregate/action.yml | 95 ------ devops/actions/run-tests/benchmark/action.yml | 200 ++++++++---- .../actions/run-tests/benchmark_v2/action.yml | 213 ------------- devops/benchmarking/config.ini | 44 --- devops/benchmarking/constants.ini | 48 --- devops/benchmarking/enabled_tests.conf | 8 - devops/scripts/benchmarking/aggregate.py | 205 ------------ devops/scripts/benchmarking/benchmark.sh | 300 ------------------ devops/scripts/benchmarking/common.py | 196 ------------ devops/scripts/benchmarking/compare.py | 101 ------ devops/scripts/benchmarking/load_config.py | 30 -- 13 files changed, 149 insertions(+), 1313 deletions(-) delete mode 100644 devops/actions/benchmarking/aggregate/action.yml delete mode 100644 devops/actions/run-tests/benchmark_v2/action.yml delete mode 100644 devops/benchmarking/config.ini delete mode 100644 devops/benchmarking/constants.ini delete mode 100644 devops/benchmarking/enabled_tests.conf delete mode 100644 devops/scripts/benchmarking/aggregate.py delete mode 100755 devops/scripts/benchmarking/benchmark.sh delete mode 100644 devops/scripts/benchmarking/common.py delete mode 100644 devops/scripts/benchmarking/compare.py delete mode 100644 devops/scripts/benchmarking/load_config.py diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml index 0b31408020658..c25050be24f32 100644 --- a/.github/workflows/sycl-linux-run-tests.yml +++ b/.github/workflows/sycl-linux-run-tests.yml @@ -25,7 +25,7 @@ on: required: False tests_selector: description: | - Three possible options: "e2e", "cts", and "compute-benchmarks". + Three possible options: "e2e", "cts", and "benchmarks". type: string default: "e2e" @@ -163,8 +163,7 @@ on: options: - e2e - cts - - compute-benchmarks - - benchmark_v2 + - benchmarks env: description: | @@ -317,18 +316,9 @@ jobs: target_devices: ${{ inputs.target_devices }} retention-days: ${{ inputs.retention-days }} - - name: Run compute-benchmarks on SYCL - if: inputs.tests_selector == 'compute-benchmarks' - uses: ./devops/actions/run-tests/benchmark - with: - target_devices: ${{ inputs.target_devices }} - env: - RUNNER_TAG: ${{ inputs.runner }} - GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }} - - name: Run benchmarks - if: inputs.tests_selector == 'benchmark_v2' - uses: ./devops/actions/run-tests/benchmark_v2 + if: inputs.tests_selector == 'benchmarks' + uses: ./devops/actions/run-tests/benchmark with: target_devices: ${{ inputs.target_devices }} upload_results: ${{ inputs.benchmark_upload_results }} diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml index 7dbb4da228188..28790af47bd6c 100644 --- a/.github/workflows/sycl-ur-perf-benchmarking.yml +++ b/.github/workflows/sycl-ur-perf-benchmarking.yml @@ -123,7 +123,7 @@ jobs: image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} - tests_selector: benchmark_v2 + tests_selector: benchmark benchmark_upload_results: ${{ inputs.upload_results }} benchmark_save_name: ${{ matrix.save_name }} benchmark_preset: ${{ inputs.preset }} @@ -151,7 +151,7 @@ jobs: image: ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: ${{ matrix.backend }} - tests_selector: benchmark_v2 + tests_selector: benchmark benchmark_save_name: Baseline benchmark_upload_results: ${{ inputs.upload_results }} benchmark_preset: ${{ inputs.preset }} diff --git a/devops/actions/benchmarking/aggregate/action.yml b/devops/actions/benchmarking/aggregate/action.yml deleted file mode 100644 index c062636684b1f..0000000000000 --- a/devops/actions/benchmarking/aggregate/action.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: 'Aggregate compute-benchmark results and produce historical averages' - -# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on -# how the benchmark results compare to a historical average: This historical -# average is calculated in this composite workflow, which aggregates historical -# data and produces measures of central tendency (median in this case) used for -# this purpose. -# -# This action assumes that /devops has been checked out in ./devops. This action -# also assumes that GITHUB_TOKEN was properly set in env, because according to -# Github, that's apparently the recommended way to pass a secret into a github -# action: -# -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets -# - -inputs: - lookback_days: - type: number - required: true - -runs: - using: "composite" - steps: - - name: Obtain oldest timestamp allowed for data in aggregation - shell: bash - run: | - # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP. - SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')" - if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then - echo "Please ensure inputs.lookback_days is a number." - exit 1 - fi - SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)" - if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then - echo "Invalid timestamp generated: is inputs.lookback_days valid?" - exit 1 - fi - echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV - - name: Load benchmarking configuration - shell: bash - run: | - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) - echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV - echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV - - name: Checkout historical performance results repository - shell: bash - run: | - if [ ! -d ./llvm-ci-perf-results ]; then - git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results - fi - - name: Run aggregator on historical results - shell: bash - run: | - # The current format of the historical results respository is: - # - # /// - # - # Thus, a min/max depth of 3 is used to enumerate all test cases in the - # repository. Test name is also derived from here. - find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' | - while read -r dir; do - test_name="$(basename "$dir")" - python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP" - done - - name: Upload average to the repo - shell: bash - run: | - cd ./llvm-ci-perf-results - git config user.name "SYCL Benchmarking Bot" - git config user.email "sys_sycl_benchmarks@intel.com" - git pull - # Make sure changes have been made - if git diff --quiet && git diff --cached --quiet; then - echo "No changes to median, skipping push." - else - git add . - git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" - fi - - name: Find aggregated average results artifact here - if: always() - shell: bash - run: | - cat << EOF - # - # Artifact link for aggregated averages here: - # - EOF - - name: Archive new medians - if: always() - uses: actions/upload-artifact@v4 - with: - name: llvm-ci-perf-results new medians - path: ./llvm-ci-perf-results/**/*-median.csv diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 03b7d4ad776fd..c7aa4f3f48c2e 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -1,24 +1,30 @@ -name: 'Run compute-benchmarks' - -# Run compute-benchmarks on SYCL -# -# This action assumes SYCL is in ./toolchain, and that /devops has been -# checked out in ./devops. This action also assumes that GITHUB_TOKEN -# was properly set in env, because according to Github, that's apparently the -# recommended way to pass a secret into a github action: -# -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets +name: 'Run benchmarks' + +# This action assumes the following prerequisites: # -# This action also expects a RUNNER_TAG environment variable to be set to the -# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux -# are fully supported. Although this workflow won't stop you from running other -# devices, note that only gen12 and pvc has been tested to work. +# - SYCL is placed in ./toolchain -- TODO change this +# - /devops has been checked out in ./devops. +# - env.GITHUB_TOKEN was properly set, because according to Github, that's +# apparently the recommended way to pass a secret into a github action: + +# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets # +# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently, +# only specific runners are fully supported. inputs: target_devices: type: string required: True + upload_results: + type: string + required: True + save_name: + type: string + required: True + preset: + type: string + required: True runs: using: "composite" @@ -27,16 +33,24 @@ runs: shell: bash env: TARGET_DEVICE: ${{ inputs.target_devices }} + PRESET: ${{ inputs.preset }} run: | case "$RUNNER_TAG" in - '["Linux", "gen12"]' | '["Linux", "pvc"]') ;; + '["PVC_PERF"]' ) ;; *) echo "#" - echo "# WARNING: Only gen12/pvc on Linux is fully supported." + echo "# WARNING: Only specific tuned runners are fully supported." echo "# This workflow is not guaranteed to work with other runners." echo "#" ;; esac + # Ensure runner name has nothing injected + # TODO: in terms of security, is this overkill? + if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then + echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." + exit 1 + fi + # input.target_devices is not directly used, as this allows code injection case "$TARGET_DEVICE" in level_zero:*) ;; @@ -46,11 +60,15 @@ runs: echo "# This workflow is not guaranteed to work with other backends." echo "#" ;; esac + echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV + + # Make sure specified preset is a known value and is not malicious + python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" + [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset + echo "PRESET=$PRESET" >> $GITHUB_ENV - name: Compute CPU core range to run benchmarks on shell: bash run: | - # Taken from ur-benchmark-reusable.yml: - # Compute the core range for the first NUMA node; second node is used by # UMF. Skip the first 4 cores as the kernel is likely to schedule more # work on these. @@ -67,61 +85,129 @@ runs: ZE_AFFINITY_MASK=0 echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + - name: Checkout results repo + shell: bash + run: | + git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - name: Run compute-benchmarks + env: + # Need to append "__" to save name in order to follow + # conventions: + SAVE_PREFIX: ${{ inputs.save_name }} shell: bash run: | - cat << EOF - # - # NOTE TO DEVELOPERS: - # - - Check latter steps of the workflow: This job produces an artifact with: - - benchmark results from passing/failing tests - - log containing all failing (too slow) benchmarks - - log containing all erroring benchmarks - - While this step in the workflow provides debugging output describing this - information, it might be easier to inspect the logs from the artifact - instead. - - EOF - export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}" + # TODO generate summary + display helpful message here export CMPLR_ROOT=./toolchain echo "-----" sycl-ls echo "-----" - taskset -c "$CORES" ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1 - - name: Push compute-benchmarks results - if: always() + # Using --break-system-packages because: + # - venv is not installed + # - unable to install anything via pip, as python packages in the docker + # container are managed by apt + # - apt is unable to install anything due to unresolved dpkg dependencies, + # as a result of how the sycl nightly images are created + pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt + echo "-----" + + # clang builds have git repo / commit hashes in their --version output, + # same goes for dpcpp. Obtain git repo / commit hash info this way: + + # First line of --version is formatted 'clang version ... ( )' + # thus we parse for ( ): + sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')" + if [ -z "$sycl_git_info" ]; then + echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" + exit 1 + fi + sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" + sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" + + case "$ONEAPI_DEVICE_SELECTOR" in + level_zero:*) SAVE_SUFFIX="L0" ;; + level_zero_v2:*) SAVE_SUFFIX="L0v2" ;; + opencl:*) SAVE_SUFFIX="OCL" ;; + *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";; + esac + # TODO accomodate for different GPUs and backends + SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}" + SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')" # Timestamps are in UTC time + + taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --sycl "$(realpath ./toolchain)" \ + --save "$SAVE_NAME" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ + --preset "$PRESET" \ + --timestamp-override "$SAVE_TIMESTAMP" \ + --github-repo "$sycl_git_repo" \ + --git-commit "$sycl_git_commit" + echo "-----" + python3 ./devops/scripts/benchmarks/compare.py to_hist \ + --name "$SAVE_NAME" \ + --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ + --results-dir "./llvm-ci-perf-results/results/" + + - name: Cache changes to benchmark folder for archival purposes + shell: bash + run: | + cd "./llvm-ci-perf-results" + for diff in $(git diff HEAD --name-only); do + mkdir -p "../cached_changes/$(dirname $diff)" + cp "$diff" "../cached_changes/$diff" + done + - name: Push benchmarks results + if: inputs.upload_results == 'true' && always() shell: bash run: | - # Load configuration values - $(python ./devops/scripts/benchmarking/load_config.py ./devops constants) - cd "./llvm-ci-perf-results" git config user.name "SYCL Benchmarking Bot" git config user.email "sys_sycl_benchmarks@intel.com" - git pull + results_branch="unify-ci" + git add . - # Make sure changes have been made if git diff --quiet && git diff --cached --quiet; then echo "No new results added, skipping push." - else - git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - git push "https://$GITHUB_TOKEN@github.com/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH" + exit 0 fi - - name: Find benchmark result artifact here - if: always() - shell: bash - run: | - cat << EOF - # - # Artifact link for benchmark results here: - # - EOF - - name: Archive compute-benchmark results + + for attempt in 1 2 3; do + echo "Attempt $attempt to push new results" + git add . + git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" + results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)" + + if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then + echo "Push succeeded" + break + fi + + echo "Push failed, retrying..." + if [ -n "$results_file" ]; then + cached_result="$(mktemp -d)/$(basename $results_file)" + mv "$results_file" "$cached_result" + + git reset --hard "origin/$results_branch" + git pull origin "$results_branch" + + mv "$cached_result" "$results_file" + fi + + echo "Regenerating data.json..." + cd ../ + ./devops/scripts/benchmarks/main.py \ + "$(realpath ./llvm_test_workdir)" \ + --output-html remote \ + --results-dir "./llvm-ci-perf-results/" \ + --output-dir "./llvm-ci-perf-results/" \ + --dry-run + cd - + done + - name: Archive benchmark results if: always() uses: actions/upload-artifact@v4 with: - name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }}) - path: ./artifact + name: Benchmark run ${{ github.run_id }} (${{ runner.name }}) + path: ./cached_changes diff --git a/devops/actions/run-tests/benchmark_v2/action.yml b/devops/actions/run-tests/benchmark_v2/action.yml deleted file mode 100644 index c7aa4f3f48c2e..0000000000000 --- a/devops/actions/run-tests/benchmark_v2/action.yml +++ /dev/null @@ -1,213 +0,0 @@ -name: 'Run benchmarks' - -# This action assumes the following prerequisites: -# -# - SYCL is placed in ./toolchain -- TODO change this -# - /devops has been checked out in ./devops. -# - env.GITHUB_TOKEN was properly set, because according to Github, that's -# apparently the recommended way to pass a secret into a github action: - -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets -# -# - env.RUNNER_TAG set to the runner tag used to run this workflow: Currently, -# only specific runners are fully supported. - -inputs: - target_devices: - type: string - required: True - upload_results: - type: string - required: True - save_name: - type: string - required: True - preset: - type: string - required: True - -runs: - using: "composite" - steps: - - name: Check specified runner type / target backend - shell: bash - env: - TARGET_DEVICE: ${{ inputs.target_devices }} - PRESET: ${{ inputs.preset }} - run: | - case "$RUNNER_TAG" in - '["PVC_PERF"]' ) ;; - *) - echo "#" - echo "# WARNING: Only specific tuned runners are fully supported." - echo "# This workflow is not guaranteed to work with other runners." - echo "#" ;; - esac - - # Ensure runner name has nothing injected - # TODO: in terms of security, is this overkill? - if [ -z "$(printf '%s' "$RUNNER_NAME" | grep -oE '^[a-zA-Z0-9_-]+$')" ]; then - echo "Bad runner name, please ensure runner name is [a-zA-Z0-9_-]." - exit 1 - fi - - # input.target_devices is not directly used, as this allows code injection - case "$TARGET_DEVICE" in - level_zero:*) ;; - *) - echo "#" - echo "# WARNING: Only level_zero backend is fully supported." - echo "# This workflow is not guaranteed to work with other backends." - echo "#" ;; - esac - echo "ONEAPI_DEVICE_SELECTOR=$TARGET_DEVICE" >> $GITHUB_ENV - - # Make sure specified preset is a known value and is not malicious - python3 ./devops/scripts/benchmarks/presets.py query "$PRESET" - [ "$?" -ne 0 ] && exit 1 # Stop workflow if invalid preset - echo "PRESET=$PRESET" >> $GITHUB_ENV - - name: Compute CPU core range to run benchmarks on - shell: bash - run: | - # Compute the core range for the first NUMA node; second node is used by - # UMF. Skip the first 4 cores as the kernel is likely to schedule more - # work on these. - CORES="$(lscpu | awk ' - /NUMA node0 CPU|On-line CPU/ {line=$0} - END { - split(line, a, " ") - split(a[4], b, ",") - sub(/^0/, "4", b[1]) - print b[1] - }')" - echo "CPU core range to use: $CORES" - echo "CORES=$CORES" >> $GITHUB_ENV - - ZE_AFFINITY_MASK=0 - echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV - - name: Checkout results repo - shell: bash - run: | - git clone -b unify-ci https://github.com/intel/llvm-ci-perf-results - - name: Run compute-benchmarks - env: - # Need to append "__" to save name in order to follow - # conventions: - SAVE_PREFIX: ${{ inputs.save_name }} - shell: bash - run: | - # TODO generate summary + display helpful message here - export CMPLR_ROOT=./toolchain - echo "-----" - sycl-ls - echo "-----" - # Using --break-system-packages because: - # - venv is not installed - # - unable to install anything via pip, as python packages in the docker - # container are managed by apt - # - apt is unable to install anything due to unresolved dpkg dependencies, - # as a result of how the sycl nightly images are created - pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt - echo "-----" - - # clang builds have git repo / commit hashes in their --version output, - # same goes for dpcpp. Obtain git repo / commit hash info this way: - - # First line of --version is formatted 'clang version ... ( )' - # thus we parse for ( ): - sycl_git_info="$(clang++ --version | head -n 1 | grep -oE '\([^ ]+ [a-f0-9]+\)$' | tr -d '()')" - if [ -z "$sycl_git_info" ]; then - echo "Error: Unable to deduce SYCL build source repo/commit: Are you sure dpcpp variable is in PATH?" - exit 1 - fi - sycl_git_repo="$(printf "$sycl_git_info" | cut -d' ' -f1)" - sycl_git_commit="$(printf "$sycl_git_info" | cut -d' ' -f2)" - - case "$ONEAPI_DEVICE_SELECTOR" in - level_zero:*) SAVE_SUFFIX="L0" ;; - level_zero_v2:*) SAVE_SUFFIX="L0v2" ;; - opencl:*) SAVE_SUFFIX="OCL" ;; - *) SAVE_SUFFIX="${ONEAPI_DEVICE_SELECTOR%%:*}";; - esac - # TODO accomodate for different GPUs and backends - SAVE_NAME="${SAVE_PREFIX}_PVC_${SAVE_SUFFIX}" - SAVE_TIMESTAMP="$(date -u +'%Y%m%d_%H%M%S')" # Timestamps are in UTC time - - taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \ - "$(realpath ./llvm_test_workdir)" \ - --sycl "$(realpath ./toolchain)" \ - --save "$SAVE_NAME" \ - --output-html remote \ - --results-dir "./llvm-ci-perf-results/" \ - --output-dir "./llvm-ci-perf-results/" \ - --preset "$PRESET" \ - --timestamp-override "$SAVE_TIMESTAMP" \ - --github-repo "$sycl_git_repo" \ - --git-commit "$sycl_git_commit" - echo "-----" - python3 ./devops/scripts/benchmarks/compare.py to_hist \ - --name "$SAVE_NAME" \ - --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ - --results-dir "./llvm-ci-perf-results/results/" - - - name: Cache changes to benchmark folder for archival purposes - shell: bash - run: | - cd "./llvm-ci-perf-results" - for diff in $(git diff HEAD --name-only); do - mkdir -p "../cached_changes/$(dirname $diff)" - cp "$diff" "../cached_changes/$diff" - done - - name: Push benchmarks results - if: inputs.upload_results == 'true' && always() - shell: bash - run: | - cd "./llvm-ci-perf-results" - git config user.name "SYCL Benchmarking Bot" - git config user.email "sys_sycl_benchmarks@intel.com" - results_branch="unify-ci" - - git add . - if git diff --quiet && git diff --cached --quiet; then - echo "No new results added, skipping push." - exit 0 - fi - - for attempt in 1 2 3; do - echo "Attempt $attempt to push new results" - git add . - git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}" - results_file="$(git diff HEAD~1 --name-only -- results/ | head -n 1)" - - if git push "https://$GITHUB_TOKEN@github.com/intel/llvm-ci-perf-results.git" "$results_branch"; then - echo "Push succeeded" - break - fi - - echo "Push failed, retrying..." - if [ -n "$results_file" ]; then - cached_result="$(mktemp -d)/$(basename $results_file)" - mv "$results_file" "$cached_result" - - git reset --hard "origin/$results_branch" - git pull origin "$results_branch" - - mv "$cached_result" "$results_file" - fi - - echo "Regenerating data.json..." - cd ../ - ./devops/scripts/benchmarks/main.py \ - "$(realpath ./llvm_test_workdir)" \ - --output-html remote \ - --results-dir "./llvm-ci-perf-results/" \ - --output-dir "./llvm-ci-perf-results/" \ - --dry-run - cd - - done - - name: Archive benchmark results - if: always() - uses: actions/upload-artifact@v4 - with: - name: Benchmark run ${{ github.run_id }} (${{ runner.name }}) - path: ./cached_changes diff --git a/devops/benchmarking/config.ini b/devops/benchmarking/config.ini deleted file mode 100644 index 988d1d9f08af9..0000000000000 --- a/devops/benchmarking/config.ini +++ /dev/null @@ -1,44 +0,0 @@ -; -; This file contains configuration options to change the behaviour of the -; benchmarking workflow in sycl-linux-run-tests.yml. -; -; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The -; contents of this file must be sanitized first before use. -; See: /devops/scripts/benchmarking/common.py -; - -; Compute-benchmark compile/run options -[compute_bench] -; Value for -j during compilation of compute-benchmarks -compile_jobs = 40 -; Number of iterations to run compute-benchmark tests -iterations = 5000 - -; Options for benchmark result metrics (to record/compare against) -[metrics] -; Sets the metrics to record/aggregate in the historical average. -; Format: comma-separated list of column names in compute-benchmark results -recorded = Median,StdDev -; Sets the tolerance for each recorded metric and their allowed deviation from -; the historical average. Metrics not included here are not compared against -; when passing/failing benchmark results. -; Format: comma-separated list of : -tolerances = Median:0.08 - -; Options for computing historical averages -[average] -; Number of days (from today) to look back for results when computing historical -; average -cutoff_range = 7 -; Minimum number of samples required to compute a historical average -min_threshold = 10 - -; ONEAPI_DEVICE_SELECTOR linting/options -[device_selector] -; Backends to allow in device_selector -enabled_backends = level_zero,opencl,cuda,hip -; native_cpu is disabled - -; Devices to allow in device_selector -enabled_devices = cpu,gpu -; fpga is disabled diff --git a/devops/benchmarking/constants.ini b/devops/benchmarking/constants.ini deleted file mode 100644 index 9281ece8f4950..0000000000000 --- a/devops/benchmarking/constants.ini +++ /dev/null @@ -1,48 +0,0 @@ -; -; This file defines constants used throughout the benchmarking workflow in -; sycl-linux-run-tests.yml. If you're trying to change the behavior of this -; workflow, you're likely looking for /devops/benchmarking/config.ini instead. -; -; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The -; contents of this file must be sanitized first before use. -; See: /devops/scripts/benchmarking/common.py -; - -; Constants for compute-benchmarks -[compute_bench] -git_repo = intel/compute-benchmarks -git_branch = master -git_commit = 230a3db4d8d03c0e9a663988f7c3abbd1137a1e0 -; path = ./compute-benchmarks - -; Constants for git repo storing benchmark performance results -[perf_res] -git_repo = intel/llvm-ci-perf-results -git_branch = main -; Path to clone performance result repo -; path = ./llvm-ci-perf-results - -; It was decided that paths should be hardcoded throughout this workflow for -; security reasons and ease of readability. Do not use paths as constants. - -; ; Constants for artifacts -; [artifact] -; ; Path to root folder storing benchmark CI artifact -; path = ./artifact -; ; Path (relative to artifact.path) to cache compute-benchmark results -; ; -; ; If a test result does not get moved out of this catch-all cache path, it is -; ; considered to have failed -; output_cache = ./artifact/failed_tests -; ; Path (relative to artifact.path) to cache passing compute-benchmark results -; passing_cache = ./artifact/passing_tests - -; [timestamp] -; ; Timestamp format used for -; format = %%Y%%m%%d_%%H%%M%%S - -; [benchmark_log] -; ; Log file for test cases that perform over the allowed variance -; slow = ./artifact/benchmarks_failed.log -; ; Log file for test cases that errored / failed to build -; error = ./artifact/benchmarks_errored.log diff --git a/devops/benchmarking/enabled_tests.conf b/devops/benchmarking/enabled_tests.conf deleted file mode 100644 index 20659cbea636d..0000000000000 --- a/devops/benchmarking/enabled_tests.conf +++ /dev/null @@ -1,8 +0,0 @@ -# Test cases to be enabled: -api_overhead_benchmark_sycl -memory_benchmark_sycl -miscellaneous_benchmark_sycl -ulls_benchmark_sycl - -# As of January 2025, these are every compute-benchmark tests with a SYCL -# implementation. diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py deleted file mode 100644 index f62a8ffed83c5..0000000000000 --- a/devops/scripts/benchmarking/aggregate.py +++ /dev/null @@ -1,205 +0,0 @@ -import csv -import sys -from pathlib import Path -import heapq -import statistics -from common import Validate, SanitizedConfig -from abc import ABC, abstractmethod -import os - - -class Aggregator(ABC): - """ - Aggregator classes used to "aggregate" a pool of elements, and produce an - "average" (precisely, some "measure of central tendency") from the elements. - """ - - @staticmethod - @abstractmethod - def get_type() -> str: - """ - Return a string indicating the type of average this aggregator - produces. - """ - pass - - @abstractmethod - def add(self, n: float): - """ - Add/aggregate an element to the pool of elements used by this aggregator - to produce an average calculation. - """ - pass - - @abstractmethod - def get_avg(self) -> float: - """ - Produce an average from the pool of elements aggregated using add(). - """ - pass - - -class SimpleMedian(Aggregator): - """ - Simple median calculation: if the number of samples being generated are low, - this is the fastest median method. - """ - - def __init__(self): - self.elements = [] - - @staticmethod - def get_type() -> str: - return "median" - - def add(self, n: float): - self.elements.append(n) - - def get_avg(self) -> float: - return statistics.median(self.elements) - - -class StreamingMedian(Aggregator): - """ - Calculate medians incrementally using heaps: Theoretically the fastest way - to calculate a median from a stream of elements, but realistically is only - faster when dealing with huge numbers of samples that would be generated by - i.e. enabling this workflow in precommit and using longer periods of time. - """ - - def __init__(self): - # Gist: we keep a minheap and a maxheap, and store the median as the top - # of the minheap. When a new element comes it gets put into the heap - # based on if the element is bigger than the current median. Then, the - # heaps are heapified and the median is repopulated by heapify. - self.minheap_larger = [] - self.maxheap_smaller = [] - - @staticmethod - def get_type() -> str: - return "median" - - # Note: numbers on maxheap should be negative, as heapq - # is minheap by default - - def add(self, n: float): - if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n: - heapq.heappush(self.maxheap_smaller, -n) - else: - heapq.heappush(self.minheap_larger, n) - - # Ensure minheap has more elements than maxheap - if len(self.maxheap_smaller) > len(self.minheap_larger) + 1: - heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller)) - elif len(self.maxheap_smaller) < len(self.minheap_larger): - heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger)) - - def get_avg(self) -> float: - if len(self.maxheap_smaller) == len(self.minheap_larger): - # Equal number of elements smaller and larger than "median": - # thus, there are two median values. The median would then become - # the average of both median values. - return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0 - else: - # Otherwise, median is always in minheap, as minheap is always - # bigger - return -self.maxheap_smaller[0] - - -class Aggregate: - """ - Static class providing methods for aggregating data - """ - - @staticmethod - def hist_avg( - benchmark_name: str, res_dir: str, cutoff: str, aggregator=SimpleMedian - ): - if not os.path.isdir(res_dir): - print(f"Not a directory: {res_dir}.", file=sys.stderr) - exit(1) - - def get_csv_samples() -> list[str]: - """Get all valid .csv samples from the results folder.""" - cache_dir = Path(f"{res_dir}") - # Filter all benchmark .csv files in the result directory: - return list( - filter( - # Make sure the .csv "file" is a file: - lambda f: f.is_file() - # Make sure timestamp of .csv file is good format: - # [-19:-4] corresponds to the timestamp in the filename. - and Validate.timestamp(str(f)[-19:-4]) - # Make sure timestamp is bigger than cutoff timestamp: - and str(f)[-19:-4] > cutoff, - cache_dir.glob(f"{benchmark_name}-*_*.csv"), - ) - ) - - # Calculate median of every desired metric: - samples_aggregate = dict() - filtered_samples = get_csv_samples() - if len(filtered_samples) == 0: - print( - f"WARNING: No results for {benchmark_name} found from {cutoff} to now", - file=sys.stderr, - ) - for sample_path in filtered_samples: - with open(sample_path, "r") as sample_file: - for sample in csv.DictReader(sample_file): - test = sample["TestCase"] - # Construct entry in aggregator for test if it doesn't exist - # already: - if test not in samples_aggregate: - samples_aggregate[test] = { - metric: aggregator() - for metric in SanitizedConfig.METRICS_TOLERANCES - } - - # For each metric of concern, add to aggregator: - for metric in SanitizedConfig.METRICS_TOLERANCES: - sample_value = Validate.sanitize_stat(sample[metric]) - if not isinstance(sample_value, float): - print( - f"Malformatted statistic in {str(sample_path)}: " - + f"'{sample[metric]}' for {test}." - ) - exit(1) - # Add metric from sample for current test to aggregate: - samples_aggregate[test][metric].add(sample_value) - - # Calculate + write new average (from samples_aggregate) in new .csv file: - with open( - f"{res_dir}/{benchmark_name}-{aggregator.get_type()}.csv", "w" - ) as output_csv: - writer = csv.DictWriter( - output_csv, - fieldnames=["TestCase", *SanitizedConfig.METRICS_TOLERANCES.keys()], - ) - writer.writeheader() - for test in samples_aggregate: - writer.writerow( - {"TestCase": test} - | { - metric: samples_aggregate[test][metric].get_avg() - for metric in SanitizedConfig.METRICS_TOLERANCES - } - ) - - -if __name__ == "__main__": - if len(sys.argv) != 5: - print( - f"Usage: {sys.argv[0]} " - ) - exit(1) - if not Validate.timestamp(sys.argv[4]): - print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.", file=sys.stderr) - exit(1) - if not Validate.filepath(sys.argv[1]): - print(f"Not a valid filepath: {sys.argv[1]}", file=sys.stderr) - exit(1) - # If the filepath provided passed filepath validation, then it is clean - SanitizedConfig.load(sys.argv[1]) - - Aggregate.hist_avg(sys.argv[2], sys.argv[3], sys.argv[4]) diff --git a/devops/scripts/benchmarking/benchmark.sh b/devops/scripts/benchmarking/benchmark.sh deleted file mode 100755 index bbfd669774f9a..0000000000000 --- a/devops/scripts/benchmarking/benchmark.sh +++ /dev/null @@ -1,300 +0,0 @@ -#!/bin/sh - -# -# benchmark.sh: Benchmark dpcpp using compute-benchmarks -# - -usage () { - >&2 echo "Usage: $0 -t [-B ] - -n Github runner name -- Required - -c Clean up working directory - -C Clean up working directory and exit - -s Cache results - -This script builds and runs benchmarks from compute-benchmarks." - exit 1 -} - -# Ensures test cases read from enabled_tests.conf contains no malicious content -_validate_testname () { - if [ -n "$(printf "%s" "$1" | sed "s/[a-zA-Z_]*//g")" ]; then - echo "Illegal characters in $TEST_CONFIG. Permitted characters: a-zA-Z_" - exit 1 - fi -} - -clone_perf_res() { - echo "### Cloning llvm-ci-perf-results ($SANITIZED_PERF_RES_GIT_REPO:$SANITIZED_PERF_RES_GIT_BRANCH) ###" - git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results - [ "$?" -ne 0 ] && exit "$?" -} - -clone_compute_bench() { - echo "### Cloning compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" - git clone -b "$SANITIZED_COMPUTE_BENCH_GIT_BRANCH" \ - --recurse-submodules "https://github.com/$SANITIZED_COMPUTE_BENCH_GIT_REPO" \ - ./compute-benchmarks - if [ ! -d "./compute-benchmarks" ]; then - echo "Failed to clone compute-benchmarks." - exit 1 - elif [ -n "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" ]; then - cd ./compute-benchmarks - git checkout "$SANITIZED_COMPUTE_BENCH_GIT_COMMIT" - if [ "$?" -ne 0 ]; then - echo "Failed to get compute-benchmarks commit '$SANITIZED_COMPUTE_BENCH_GIT_COMMIT'." - exit 1 - fi - cd - - fi -} - -build_compute_bench() { - echo "### Building compute-benchmarks ($SANITIZED_COMPUTE_BENCH_GIT_REPO:$SANITIZED_COMPUTE_BENCH_GIT_BRANCH) ###" - mkdir ./compute-benchmarks/build && cd ./compute-benchmarks/build && - # No reason to turn on ccache, if this docker image will be disassembled later on - cmake .. -DBUILD_SYCL=ON -DBUILD_L0=OFF -DBUILD=OCL=OFF -DCCACHE_ALLOWED=FALSE - # TODO enable mechanism for opting into L0 and OCL -- the concept is to - # subtract OCL/L0 times from SYCL times in hopes of deriving SYCL runtime - # overhead, but this is mostly an idea that needs to be mulled upon. - - if [ "$?" -eq 0 ]; then - while IFS= read -r case; do - # Skip lines starting with '#' - [ "${case##\#*}" ] || continue - - _validate_testname "$case" - make "-j$SANITIZED_COMPUTE_BENCH_COMPILE_JOBS" "$case" - done < "$TESTS_CONFIG" - fi - cd - -} - -# Check if the number of samples for a given test case is less than a threshold -# set in benchmark-ci.conf -# -# Usage: -samples_under_threshold () { - # Directory doesn't exist, samples automatically under threshold - [ ! -d "./llvm-ci-perf-results/$1" ] && return 0 - file_count="$(find "./llvm-ci-perf-results/$1" -maxdepth 1 -type f | wc -l )" - [ "$file_count" -lt "$SANITIZED_AVERAGE_MIN_THRESHOLD" ] -} - -# Check for a regression via compare.py -# -# Usage: check_regression -check_regression() { - csv_relpath="$(dirname "$1")" - csv_name="$(basename "$1")" - if samples_under_threshold "$csv_relpath"; then - echo "Not enough samples to construct a good average, performance\ - check skipped!" - return 0 # Success status - fi - python "$DEVOPS_PATH/scripts/benchmarking/compare.py" \ - "$DEVOPS_PATH" "$csv_relpath" "$csv_name" - return $? -} - -# Move the results of our benchmark into the git repo, and save benchmark -# results to artifact archive -# -# Usage: cache -cache() { - mkdir -p "$(dirname ./artifact/passing_tests/$1)" "$(dirname ./artifact/failed_tests/$1)" - cp "./artifact/failed_tests/$1" "./artifact/passing_tests/$1" - mkdir -p "$(dirname ./llvm-ci-perf-results/$1)" - mv "./artifact/failed_tests/$1" "./llvm-ci-perf-results/$1" -} - -# Check for a regression + cache if no regression found -# -# Usage: check_and_cache -check_and_cache() { - echo "Checking $1..." - if check_regression $1; then - if [ "$CACHE_RESULTS" -eq "1" ]; then - echo "Caching $1..." - cache $1 - fi - else - [ "$CACHE_RESULTS" -eq "1" ] && echo "Regression found -- Not caching!" - fi -} - -# Run and process the results of each enabled benchmark in enabled_tests.conf -process_benchmarks() { - echo "### Running and processing selected benchmarks ###" - if [ -z "$TESTS_CONFIG" ]; then - echo "Setting tests to run via cli is not currently supported." - exit 1 - else - rm ./artifact/benchmarks_errored.log ./artifact/benchmarks_failed.log 2> /dev/null - mkdir -p ./artifact - # Loop through each line of enabled_tests.conf, but ignore lines in the - # test config starting with #'s: - grep "^[^#]" "$TESTS_CONFIG" | while read -r testcase; do - _validate_testname "$testcase" - echo "# Running $testcase..." - - # The benchmark results git repo and this script's output both share - # the following directory structure: - # - # /// - # - # Instead of specifying 2 paths with a slightly different root - # folder name for every function we use, we can use a relative path - # to represent the file in both folders. - # - # Figure out the relative path of our testcase result: - test_dir_relpath="$DEVICE_SELECTOR_DIRNAME/$RUNNER/$testcase" - output_csv_relpath="$test_dir_relpath/$testcase-$TIMESTAMP.csv" - mkdir -p "./artifact/failed_tests/$test_dir_relpath" # Ensure directory exists - - # Tests are first placed in ./artifact/failed_tests, and are only - # moved to passing_tests or the performance results repo if the - # benchmark results are passing - output_csv="./artifact/failed_tests/$output_csv_relpath" - "./compute-benchmarks/build/bin/$testcase" --csv \ - --iterations="$SANITIZED_COMPUTE_BENCH_ITERATIONS" > "$output_csv" - - exit_status="$?" - if [ "$exit_status" -eq 0 ] && [ -s "$output_csv" ]; then - # Filter out header lines not in csv format: - tail +8 "$output_csv" > .tmp_res - mv .tmp_res "$output_csv" - check_and_cache $output_csv_relpath - else - echo "[ERROR] $testcase returned exit status $exit_status" - echo "-- $testcase: error $exit_status" >> ./artifact/benchmarks_errored.log - fi - done - fi -} - -# Handle failures + produce a report on what failed -process_results() { - fail=0 - if [ -s ./artifact/benchmarks_failed.log ]; then - printf "\n### Tests performing over acceptable range of average: ###\n" - cat ./artifact/benchmarks_failed.log - echo "" - fail=2 - fi - if [ -s ./artifact/benchmarks_errored.log ]; then - printf "\n### Tests that failed to run: ###\n" - cat ./artifact/benchmarks_errored.log - echo "" - fail=1 - fi - exit $fail -} - -cleanup() { - echo "### Cleaning up compute-benchmark builds from prior runs ###" - rm -rf ./compute-benchmarks - rm -rf ./llvm-ci-perf-results - [ ! -z "$_exit_after_cleanup" ] && exit -} - -load_configs() { - # This script needs to know where the intel/llvm "/devops" directory is, - # containing all the configuration files and the compare script. - # - # If this is not provided, this function tries to guess where the files - # are based on how the script is called, and verifies that all necessary - # configs and scripts are reachable. - - # This benchmarking script is usually at: - # - # /devops/scripts/benchmarking/benchmark.sh - # - # Derive /devops based on location of this script: - [ -z "$DEVOPS_PATH" ] && DEVOPS_PATH="$(dirname "$0")/../.." - if [ -z "$(printf '%s' "$DEVOPS_PATH" | grep -oE '^[a-zA-Z0-9._\/-]+$')" ]; then - echo "Bad DEVOPS_PATH, please specify DEVOPS_PATH variable." - exit 1 - fi - - TESTS_CONFIG="$(realpath "$DEVOPS_PATH/benchmarking/enabled_tests.conf")" - COMPARE_PATH="$(realpath "$DEVOPS_PATH/scripts/benchmarking/compare.py")" - LOAD_CONFIG_PY="$(realpath "$DEVOPS_PATH/scripts/benchmarking/load_config.py")" - - for file in \ - "$TESTS_CONFIG" "$COMPARE_PATH" "$LOAD_CONFIG_PY" - do - if [ ! -f "$file" ]; then - echo "Please provide path to /devops in DEVOPS_PATH." - exit -1 - fi - done - - $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" config) - $(python "$LOAD_CONFIG_PY" "$DEVOPS_PATH" constants) -} - -##### - -load_configs - -COMPUTE_BENCH_COMPILE_FLAGS="" -CACHE_RESULTS="0" -# Timestamp format is YYYYMMDD_HHMMSS -TIMESTAMP="$(date +%Y%m%d_%H%M%S)" - -# CLI flags + overrides to configuration options: -while getopts "n:cCs" opt; do - case "$opt" in - n) - if [ -n "$(printf "%s" "$OPTARG" | sed "s/[a-zA-Z0-9_-]*//g")" ]; then - echo "Illegal characters in runner name." - exit 1 - fi - RUNNER="$OPTARG" - ;; - # Cleanup status is saved in a var to ensure all arguments are processed before - # performing cleanup - c) _cleanup=1 ;; - C) _cleanup=1 && _exit_after_cleanup=1 ;; - s) CACHE_RESULTS=1;; - \?) usage ;; - esac -done - -# Check all necessary variables exist: -if [ -z "$CMPLR_ROOT" ]; then - echo "Please set CMPLR_ROOT first; it is needed by compute-benchmarks to build." - exit 1 -elif [ -z "$ONEAPI_DEVICE_SELECTOR" ]; then - echo "Please set ONEAPI_DEVICE_SELECTOR first to specify which device to use." - exit 1 -elif [ -z "$RUNNER" ]; then - echo "Please specify runner name using -n first; it is needed for storing/comparing benchmark results." - exit 1 -fi - -# Make sure ONEAPI_DEVICE_SELECTOR doesn't try to enable multiple devices at the -# same time, or use specific device id's -_dev_sel_backend_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" | sed 's/,/|/g')" -_dev_sel_device_re="$(echo "$SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" | sed 's/,/|/g')" -_dev_sel_re="s/($_dev_sel_backend_re):($_dev_sel_device_re)//" -if [ -n "$(echo "$ONEAPI_DEVICE_SELECTOR" | sed -E "$_dev_sel_re")" ]; then - echo "Unsupported ONEAPI_DEVICE_SELECTOR value: please ensure only one \ -device is selected, and devices are not selected by indices." - echo "Enabled backends: $SANITIZED_DEVICE_SELECTOR_ENABLED_BACKENDS" - echo "Enabled device types: $SANITIZED_DEVICE_SELECTOR_ENABLED_DEVICES" - exit 1 -fi -# ONEAPI_DEVICE_SELECTOR values are not valid directory names in unix: this -# value lets us use ONEAPI_DEVICE_SELECTOR as actual directory names -DEVICE_SELECTOR_DIRNAME="$(echo "$ONEAPI_DEVICE_SELECTOR" | sed 's/:/-/')" - -# Clean up and delete all cached files if specified: -[ ! -z "$_cleanup" ] && cleanup -# Clone and build only if they aren't already cached/deleted: -[ ! -d ./llvm-ci-perf-results ] && clone_perf_res -[ ! -d ./compute-benchmarks ] && clone_compute_bench -[ ! -d ./compute-benchmarks/build ] && build_compute_bench -# Process benchmarks: -process_benchmarks -process_results \ No newline at end of file diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py deleted file mode 100644 index c400b686db90f..0000000000000 --- a/devops/scripts/benchmarking/common.py +++ /dev/null @@ -1,196 +0,0 @@ -import re -import os -import sys -import string -import configparser - - -class Validate: - """Static class containing methods for validating various fields""" - - @staticmethod - def filepath(path: str) -> bool: - """ - Returns True if path is clean (no illegal characters), otherwise False. - """ - filepath_re = re.compile(r"[a-zA-Z0-9\/\._\-]+") - return filepath_re.match(path) is not None - - @staticmethod - def timestamp(t: str) -> bool: - """ - Returns True if t is in form YYYYMMDD_HHMMSS, otherwise False. - """ - timestamp_re = re.compile( - r"^\d{4}(0[1-9]|1[0-2])([0-2][0-9]|3[01])_([01][0-9]|2[0-3])[0-5][0-9][0-5][0-9]$" - ) - return timestamp_re.match(t) is not None - - @staticmethod - def sanitize_stat(stat: str) -> float: - """ - Sanitize statistics found in compute-benchmark output csv files. Returns - float if sanitized, None if not sanitizable. - """ - # Get rid of % - if stat[-1] == "%": - stat = stat[:-1] - - # Cast to float: If cast succeeds, the statistic is clean. - try: - return float(stat) - except ValueError: - return None - - -class SanitizedConfig: - """ - Static class for holding sanitized configuration values used within python. - - Configuration option names follow
_