GoogleCloudPlatform
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 3 additions & 3 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 4 additions & 1 deletion b/‎.devcontainer/devcontainer.json‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/workflows/dictionary/python.txt‎
Lines changed: 17 additions & 0 deletions b/‎.github/workflows/dictionary/python.txt‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/workflows/dictionary/reinforcement-learning.txt‎
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/dictionary/reinforcement-learning.txt‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.github/workflows/dictionary/sglang.txt‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/dictionary/sglang.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/dictionary/shell.txt‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/dictionary/shell.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 0 deletions b/‎.gitignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎container-images/cpu/k6-benchmark/Dockerfile‎
Lines changed: 31 additions & 0 deletions b/‎container-images/cpu/k6-benchmark/Dockerfile‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎container-images/cpu/k6-benchmark/README.md‎
Lines changed: 99 additions & 0 deletions b/‎container-images/cpu/k6-benchmark/README.md‎
Lines changed: 99 additions & 0 deletions
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM hashicorp/terraform:1.5.7 AS terraform
-FROM koalaman/shellcheck:v0.10.0 AS shellcheck
-FROM mvdan/shfmt:v3.10.0 AS shfmt
+FROM hashicorp/terraform:1.14.8 AS terraform
+FROM koalaman/shellcheck:v0.11.0 AS shellcheck
+FROM mvdan/shfmt:v3.13.1 AS shfmt
 
 FROM python:3.13-bookworm AS python-builder
 
 
@@ -1,6 +1,6 @@
 {
   "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
-  "name": "Cloud Solutions devcontainer",
+  "name": "Accelerated Platforms devcontainer",
   "build": {
     "dockerfile": "Dockerfile"
   },
@@ -13,7 +13,9 @@
         "editor.wordWrap": "off",
         "files.insertFinalNewline": true,
         "files.trimFinalNewlines": true,
+        "geminicodeassist.displayInlineContextHint": false,
         "prettier.resolveGlobalModules": true,
+        "python.defaultInterpreterPath": "/venv/bin/python",
         "redhat.telemetry.enabled": false,
         "telemetry.telemetryLevel": "off",
         "[css]": {
@@ -78,6 +80,7 @@
         "ms-azuretools.vscode-containers",
         "ms-python.black-formatter",
         "ms-python.isort",
+        "ms-python.python",
         "streetsidesoftware.code-spell-checker",
         "timonwong.shellcheck"
       ]
 
@@ -3,10 +3,16 @@ aiohttp
 aqtp
 asctime
 asgi
+asynccontextmanager
 asyncio
+certifi
+cffi
 classmethod
 configparser
+contextlib
 coveragerc
+dataclass
+dataclasses
 dataframe
 dbapi
 dbcommands
@@ -17,6 +23,7 @@ fastapi
 fillna
 fromarray
 frombuffer
+fromisoformat
 fsspec
 ftfy
 functools
@@ -29,11 +36,13 @@ getframerate
 getnchannels
 getnframes
 getsampwidth
+grpcio
 gunicorn
 hasattr
 hashlib
 hexdigest
 httpx
+idna
 iloc
 imgf
 inplace
@@ -59,7 +68,10 @@ pgvector
 pipreqs
 pmap
 prng
+protos
+pyasn
 pycache
+pycparser
 pydantic
 pyenv
 pylint
@@ -69,8 +81,10 @@ pythondontwritebytecode
 pythonpath
 pythonunbuffered
 qualname
+quantiles
 readframes
 removesuffix
+reqs
 rerank
 reranked
 retryable
@@ -83,13 +97,16 @@ shutil
 spacy
 splitlines
 sqlalchemy
+strftime
 tensorboard
 tensorboardx
 thejsonlogger
 tqdm
 unittests
 urllib
+urlopen
 urlretrieve
 uvicorn
 venv
 writerow
+writestr
@@ -0,0 +1,12 @@
+epath
+etils
+grpo
+highmem
+logdir
+logps
+maxtext
+multiproc
+returncode
+sigabrt
+strftime
+tunix
@@ -0,0 +1,4 @@
+lmsysorg
+musa
+nvls
+sglang
@@ -16,6 +16,7 @@ nslookup
 pipefail
 pkill
 shuf
+subshell
 syscall
 xtrace
 zxvf
@@ -39,3 +39,10 @@ terraform.tfstate*
 # Test
 test/log/*.log
 test/scripts/environment_files/*
+
+# Generated outputs
+*.log
+k6-*.txt
+k6-*.csv
+k6-*.jsonl
+k6-report.md
@@ -62,12 +62,16 @@ the primary runtime.
       - [Benchmarking Online inference performance on Google Kubernetes Engine (GKE)](/docs/platforms/gke/base/use-cases/inference-ref-arch/inference-perf-bench/inf-perf-benchmarking-with-hf-model.md)
 
 - [Training reference architecture](/docs/platforms/gke/base/use-cases/training-ref-arch/README.md)
+
   - [Model fine tuning](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/README.md)
     - [Data processing](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/data-processing.md)
     - [Data preparation](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/data-preparation.md)
     - [Fine tuning](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/fine-tuning.md)
     - [Model evaluation](/docs/platforms/gke/base/use-cases/training-ref-arch/model-fine-tuning/model-evaluation.md)
 
+- [Reinforcement Learning reference architecture](/docs/platforms/gke/base/use-cases/reinforcement-learning/README.md)
+  - [RL on TPU](/docs/platforms/gke/base/use-cases/reinforcement-learning/single-host-tpu-grpo/README.md)
+
 ### Guides
 
 - [LLM Inference Optimization: Achieving faster Pod Startup with Google Cloud Storage](/use-cases/inferencing/cost-optimization/gcsfuse/AchievingFasterPodStartup.md)
 
@@ -0,0 +1,31 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM grafana/k6:1.7.1
+
+USER root
+
+WORKDIR /app
+# Create the /output directory and ensure k6 owns it, along with /app
+RUN mkdir -p /output && chown -R k6:k6 /app /output
+
+COPY --chown=k6:k6 scripts /app/scripts
+COPY --chmod=a+x --chown=k6:k6 entrypoint.sh /app/entrypoint.sh
+
+# Switch back to the unprivileged k6 user
+USER k6
+
+ENTRYPOINT ["/app/entrypoint.sh"]
+
+CMD ["--help"]
@@ -0,0 +1,99 @@
+# k6 Benchmark Image
+
+This container image packages [k6](https://k6.io/) load testing tool with
+specific scripts to benchmark Machine Learning inference workloads.
+
+It is designed to run in environments like Google Kubernetes Engine (GKE) to
+generate consistent, reproducible load against target endpoints and output
+granular metrics to a JSONL file for further analysis. It also includes a Python
+script (`extract_metrics.py`) that can be run manually to process the k6 output
+and generate a price/performance report.
+
+## Usage
+
+You can run this container image via Docker or deploy it as a Job in a
+Kubernetes cluster.
+
+### Environment Variables
+
+The container accepts the following optional environment variables for metric
+output naming and processing:
+
+- `ACCELERATOR_NAME`: A string representing the target hardware (e.g., `l4`,
+  `a100`, `v5p`). If not provided, it defaults to `accelerator-not-set`.
+- `NODE_HOURLY_COST`: The hourly cost of the underlying node in USD. Used by the
+  automatic metric extraction script to compute cost per 1k images. Defaults to
+  `0.0`.
+
+The default benchmark script (`k6-diffusers-flux-2-klein-4b.js`) expects the
+following environment variables:
+
+- `TARGET_URL`: The full URL of the inference endpoint to test (e.g.,
+  `http://model-service:8000/generate`).
+- `BATCH_SIZE`: The batch size to request in the payload (default: `1`).
+- `VUS`: The number of concurrent Virtual Users to simulate (default: `1`).
+
+### Running via Docker
+
+Set the k6 script to run by setting the `CMD` to point to the script path when
+starting the container:
+
+```bash
+# Example: running a different script mounted into the container
+docker run --rm \
+  -e ACCELERATOR_NAME="custom" \
+  -v $(pwd)/custom-script.js:/app/custom-script.js \
+  -v $(pwd)/output:/output \
+  k6-benchmark:latest /app/your-k6-script.js
+```
+
+The k6 output will be saved in the mapped `/output` directory on your host. The
+filename will be dynamically generated in the format:
+`<name-of-k6-script>-<ACCELERATOR_NAME>-<experiment-start-timestamp>.jsonl`. For
+For example: `k6-diffusers-flux-2-klein-4b-l4-20260417T120000Z.jsonl`.
+
+#### Supported Benchmarks
+
+The following benchmark scripts are included:
+
+- **`/app/k6-diffusers-flux-2-klein-4b.js`**: Benchmark the FLUX.2-klein-4B
+  image generation model.
+
+## Metrics Extraction
+
+The extraction script (`extract_metrics.py`) can be run manually after the
+benchmark finishes to generate a price/performance report.
+
+The extraction script calculates throughput (Images/sec) and latencies (p50,
+p95, p99) strictly from the `benchmark` scenario, and automatically fetches
+corresponding on-node telemetry (Peak VRAM, Avg GPU Utilization) from Google
+Cloud Monitoring if the dependencies are installed and it is running on Google
+Cloud.
+
+To ensure accurate hardware metrics when multiple deployments are running in the
+same project, the script can filter by pod, namespace, or node. If the `--pod`
+argument is omitted, the script automatically uses the `deployment_name`
+(extracted from the `TARGET_URL` hostname) as a prefix to filter for relevant
+pods.
+
+### Script Arguments
+
+- `--file`: Path to the k6 `.jsonl` output file (Required).
+- `--output-csv`: Path to the output CSV file where aggregated results are
+  stored (Optional, default: `k6-benchmark.csv`).
+- `--hourly-cost`: The hourly cost of the underlying GKE node in USD. If set to
+  `0.0`, a warning is emitted and cost metrics will be `0.0` (Optional, default:
+  `0.0`).
+- `--project-id`: Google Cloud Project ID to query DCGM metrics via Cloud
+  Monitoring. If omitted, the script dynamically fetches the project ID from the
+  Google Cloud Metadata server (Optional).
+- `--pod`: Filter metrics by a specific pod name. If omitted, the script
+  automatically uses the `deployment_name` (derived from the `TARGET_URL`
+  hostname) as a prefix filter to match all relevant pods in the deployment
+  (Optional).
+- `--namespace`: Filter metrics by a specific namespace (Optional).
+- `--node`: Filter metrics by a specific node name (Optional).
+- `--vram-metric`: The Prometheus metric string for VRAM usage (Default:
+  `prometheus.googleapis.com/DCGM_FI_DEV_FB_USED/gauge`).
+- `--util-metric`: The Prometheus metric string for GPU utilization (Default:
+  `prometheus.googleapis.com/DCGM_FI_DEV_GPU_UTIL/gauge`).
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +lmsysorg
 +musa
 +nvls
 +sglang