pytorch · yangw-dev · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 7, 2025
diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md
@@ -0,0 +1,98 @@
+# Benchmark Tool
+================
+Provide a set of tools for users and developers for pytorch benchmark data and api.
+
+## Quick Install
+---------------
+
+To install the Benchmark Tool, run the following commands:
+
+```bash
+cd test-infra/tools/benchmark
+pip install .
+```
+
+## Benchmark Tooling
+-------------------
+
+### CLI Tool: pt2-bm-cli
+
+The `pt2-bm-cli` tool is based on [Cement](https://github.com/datafolklabs/cement).
+
+#### Checking CLI Help
+
+To check the available options and commands, run:
+
+```bash
+pt2-bm-cli --help
+```
+
+#### Fetching ExecuBench Data
+
+To fetch ExecuBench data from a specific start time to end time, run:
+
+```bash
+pt2-bm-cli group-data-query --name execubench --startTime "2025-06-01T00:00:00" --endTime "2025-06-06T00:00:00" run
+```
+
+### Importing Python Library
+
+To import the `data_model` module in Python, use:
+
+```python
+from pt2_bm_tools.data_models.benchmark_query_group_data_model import (
+    BenchmarkQueryGroupDataParams,
+)
+```
+
+To import the `lib` module in Python, use:
+
+```python
+from pt2_bm_tools.lib.fetch_group_data import (
+    fetch_group_data,
+    fetch_group_data_execubench,
+)
+```
+
+## Structure
+------------
+
+The Benchmark Tool has the following structure:
+
+```markdown
+tools/
+└── benchmarks/
+    ├── pt2_bm_tools/ # main entry point for benchmark library
+    │   ├── __init__.py
+    │   ├── lib/               # lib methods can be imported and used in python env
+    │   │   ├── __init__.py
+    │   │   └── benchmark_execu_query_api.py
+    │   └── data_models/      # data models can be imported and used
+    │       ├── __init__.py
+    │       └── benchmark_query_group_data_model.py
+    └── cli/                  # cement-based cli tool
+    ├── README.md
+    └── requirements.txt      # dev dependencies for benchmark tool
+    └── generate.sh           # bash script to generate python and ts data models from json files
+    └── zod-generate.sh       # bash script to generate zod schema from ts interface
+```
+
+## Data Models
+----------------------
+the data models are generated from the json schema files in `test-infra/tools/benchmark/data_models`. The data models are used to represent the benchmark data and api.
+It is used: zod, pydantic, and datamodel-code-generator.
+
+### Generating Data Models
+
+To generate data models, run:
+
+```bash
+cd test-infra/tools/benchmark
+./generate.sh
+```
+
+This will update the related data models based on the JSON schema configuration. The generated data models include:
+
+* TypeScript interfaces from JSON Schemas
+* Pydantic (Python) models from the same schema with validation included
+* Zod schema (for runtime validation in TypeScript) from the generated interface with validation included
diff --git a/tools/benchmark/__init__.py b/tools/benchmark/__init__.py
diff --git a/tools/benchmark/cli/__init__.py b/tools/benchmark/cli/__init__.py
@@ -0,0 +1,17 @@
+import inspect
+import os
+
+
+# Allow import only if caller is cli.* or being executed as CLI script
+caller = inspect.stack()[1].frame.f_globals.get("__name__", "")
+env_calling_cli_directly = os.environ.get("ALLOW_CLI_IMPORT", "") == "1"
+
+if not (
+    caller.startswith("cli.")
+    or caller == "__main__"
+    or "pt2-bm-cli" in os.path.basename(inspect.stack()[-1].filename)
+    or env_calling_cli_directly
+):
+    raise ImportError(
+        "The 'cli' package is internal to the command-line interface. Do not import it from non-CLI modules."
+    )
diff --git a/tools/benchmark/cli/cli.py b/tools/benchmark/cli/cli.py
@@ -0,0 +1,27 @@
+from cement import App, Controller
+from cli.fetch_group_data_controller import GroupDataQueryController
+
+
+class BaseController(Controller):
+    class Meta:
+        label = "base"
+        help = "pt2-bm-cli: PyTorch Benchmark CLI"
+
+
+class Pt2BMCLI(App):
+    class Meta:
+        label = "pt2-bm-cli"
+        base_controller = "base"
+        handlers = [
+            BaseController,
+            GroupDataQueryController,
+        ]
+
+
+def main():
+    with Pt2BMCLI() as app:
+        app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/benchmark/cli/fetch_group_data_controller.py b/tools/benchmark/cli/fetch_group_data_controller.py
@@ -0,0 +1,60 @@
+from cement import Controller, ex
+from cli.sub_clis.fetch_group_data_execubench_controller import run_execubench
+
+
+class GroupDataQueryController(Controller):
+    class Meta:
+        label = "group-data-query"
+        stacked_on = "base"
+        stacked_type = "nested"
+        help = "Group data query CLI to query data from group data table"
+        arguments = [
+            (
+                ["--name"],
+                {
+                    "help": "Which shorcut to run (e.g. execubench), default is `default`",
+                    "dest": "name",
+                    "default": "default",
+                },
+            ),
+            (
+                ["--env"],
+                {
+                    "help": "Environment (local or prod)",
+                    "choices": ["local", "prod"],
+                    "default": "prod",
+                    "dest": "env",
+                },
+            ),
+            (
+                ["--startTime"],
+                {
+                    "help": "Start time: YYYY-MM-DDTHH:MM:SS",
+                    "dest": "startTime",
+                    "required": True,
+                },
+            ),
+            (
+                ["--endTime"],
+                {
+                    "help": "End time: YYYY-MM-DDTHH:MM:SS",
+                    "dest": "endTime",
+                    "required": True,
+                },
+            ),
+        ]
+
+    @ex(help="Run group data query")
+    def run(self):
+        args = self.app.pargs
+        if not args.startTime or not args.endTime:
+            print("[ERROR] --startTime and --endTime are required")
+            return
+        if args.name == "execubench":
+            run_execubench(
+                env=args.env,
+                start_time=args.startTime,
+                end_time=args.endTime,
+            )
+        else:
+            print(f"[ERROR] Unsupported query name: {args.name}")
diff --git a/tools/benchmark/cli/sub_clis/fetch_group_data_execubench_controller.py b/tools/benchmark/cli/sub_clis/fetch_group_data_execubench_controller.py
@@ -0,0 +1,20 @@
+import argparse
+from datetime import datetime
+from pprint import pprint
+
+from pt2_bm_tools.lib.fetch_group_data import fetch_execubench_group_data
+
+
+def run_execubench(env: str, start_time: str, end_time: str):
+    """
+    Args:
+        env (str): "local" or "prod"
+        start_time (str): ISO8601 string without milliseconds
+        end_time (str): ISO8601 string without milliseconds
+    """
+    resp = fetch_execubench_group_data(env, start_time, end_time)
+    group_infos = [job.get("groupInfo", {}) for job in resp]
+    print(f"Fetched {len(resp)} table views")
+    pprint(group_infos)
+    if resp:
+        print(f"Peeking first table view: {resp[0]}")
diff --git a/tools/benchmark/data_schemas/benchmark_query_group_data.schema.json b/tools/benchmark/data_schemas/benchmark_query_group_data.schema.json
@@ -0,0 +1,20 @@
+
+{
+    "title": "BenchmarkQueryGroupDataParams",
+    "type": "object",
+    "properties": {
+      "repo": { "type": "string" },
+      "benchmark_name": { "type": "string" },
+      "start_time": { "type": "string"},
+      "end_time": { "type": "string"},
+      "group_table_by_fields": {
+        "type": "array",
+        "items": { "type": "string" }
+      },
+      "group_row_by_fields": {
+        "type": "array",
+        "items": { "type": "string" }
+      }
+    },
+    "required": ["repo", "benchmark_name", "start_time", "end_time"]
+  }
diff --git a/tools/benchmark/generate.sh b/tools/benchmark/generate.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -e  # exit immediately if any command fails
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TORCHCI_DIR="$SCRIPT_DIR/../../torchci"
+GEN_ZOD_SCRIPT="$SCRIPT_DIR/zod-generate.sh"
+SCHEMA_DIR="$SCRIPT_DIR/data_schemas"
+PYTHON_DATA_MODEL_DST="$SCRIPT_DIR/pt2_bm_tools/data_models"
+TS_DATA_MODEL_DST="$TORCHCI_DIR/lib/benchmark/dataModels"
+
+# Generate TypeScript interface from JSON Schema
+npx json-schema-to-typescript \
+  --input "$SCHEMA_DIR/benchmark_query_group_data.schema.json" \
+  --output "$TS_DATA_MODEL_DST/benchmark_query_group_data_model.ts"
+
+# Generate python datamodel from JSON Schema
+datamodel-codegen \
+  --input  "$SCHEMA_DIR/benchmark_query_group_data.schema.json" \
+  --input-file-type jsonschema \
+  --output "$PYTHON_DATA_MODEL_DST/benchmark_query_group_data_model.py"
+
+cd "$TORCHCI_DIR" && "$GEN_ZOD_SCRIPT" ./lib/benchmark/dataModels/benchmark_query_group_data_model.ts ./lib/benchmark/dataModels/benchmark_query_group_data_model.zod.ts
diff --git a/tools/benchmark/pt2_bm_tools/data_models/benchmark_query_group_data_model.py b/tools/benchmark/pt2_bm_tools/data_models/benchmark_query_group_data_model.py
@@ -0,0 +1,18 @@
+# generated by datamodel-codegen:
+#   filename:  benchmark_query_group_data.schema.json
+#   timestamp: 2025-06-13T01:30:09+00:00
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+from pydantic import BaseModel
+
+
+class BenchmarkQueryGroupDataParams(BaseModel):
+    repo: str
+    benchmark_name: str
+    start_time: str
+    end_time: str
+    group_table_by_fields: Optional[List[str]] = None
+    group_row_by_fields: Optional[List[str]] = None
diff --git a/tools/benchmark/pt2_bm_tools/lib/__init__.py b/tools/benchmark/pt2_bm_tools/lib/__init__.py
diff --git a/tools/benchmark/pt2_bm_tools/lib/fetch_group_data.py b/tools/benchmark/pt2_bm_tools/lib/fetch_group_data.py
@@ -0,0 +1,59 @@
+import datetime
+from pprint import pprint
+
+import requests
+from pt2_bm_tools.data_models.benchmark_query_group_data_model import (  # adjust import as needed
+    BenchmarkQueryGroupDataParams,
+)
+from pydantic import ValidationError
+
+
+BASE_URLS = {
+    "local": "http://localhost:3000",
+    "prod": "https://hud.pytorch.org",
+}
+
+
+def fetch_group_data(env: str, req: BenchmarkQueryGroupDataParams):
+    url = f"{BASE_URLS[env]}/api/benchmark/group_data/result"
+    if env not in BASE_URLS:
+        raise ValueError(f"Invalid environment: {env}")
+    try:
+        # validate format
+        datetime.datetime.strptime(req.start_time, "%Y-%m-%dT%H:%M:%S")
+        datetime.datetime.strptime(req.end_time, "%Y-%m-%dT%H:%M:%S")
+    except ValueError:
+        raise ValueError(
+            "start_time and end_time must be in format YYYY-MM-DDTHH:MM:SS"
+        )
+    try:
+        params = req.model_dump()
+        print(f"Preparing request params: {params}")
+    except ValidationError as e:
+        print(f"Validation failed: {e}")
+        raise
+
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        print("✅ Successfully fetched benchmark data")
+        return response.json()
+    else:
+        print(response.text)
+        response.raise_for_status()
+        raise Exception("Failed to fetch benchmark data")
+
+
+def fetch_execubench_group_data(env: str, start_time_str: str, end_time_str: str):
+    try:
+        params_object = BenchmarkQueryGroupDataParams(
+            repo="pytorch/executorch",
+            benchmark_name="ExecuTorch",
+            start_time=start_time_str,
+            end_time=end_time_str,
+            group_table_by_fields=["device", "backend", "arch", "model"],
+            group_row_by_fields=["workflow_id", "job_id", "granularity_bucket"],
+        )
+    except ValidationError as e:
+        print(f"Validation failed: {e}")
+        raise
+    return fetch_group_data(env, params_object)
diff --git a/tools/benchmark/pyproject.toml b/tools/benchmark/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pytorch-benchmark-tools"
+description = "Benchmark library and CLI tools"
+version = "0.1.0"
+requires-python = ">=3.12"
+dependencies = [
+    "cement>=3.0.8",
+    "pydantic>=2.0",
+    "requests"
+]
+authors = [
+  { name = "Yang Wang", email = "[email protected]" }
+]
+
+[project.scripts]
+pt2-bm-cli = "cli.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["pt2_bm_tools","cli"]
diff --git a/tools/benchmark/requirements.txt b/tools/benchmark/requirements.txt
@@ -0,0 +1,2 @@
+datamodel-code-generator>=0.30.1
+pydantic>=2.11.5
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		datamodel-code-generator>=0.30.1
		pydantic>=2.11.5