Skip to content

Commit f2eaf67

Browse files
author
ssjia
committed
Update on "[ET-VK] Generalize constant_pad_nd to support any storage type and packed dimension"
Replace the old pad_channel and pad_height_width shaders with generalized pad_buffer and pad_texture shaders that work with any storage type and packed dimension using BufferMetadata/TextureMetadata and indexing.glslh utilities. Differential Revision: [D95970168](https://our.internmc.facebook.com/intern/diff/D95970168/) [ghstack-poisoned]
2 parents 961a140 + 2dd8cac commit f2eaf67

File tree

170 files changed

+7689
-8523
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

170 files changed

+7689
-8523
lines changed

.ci/scripts/export_model_artifact.sh

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- nvidia/diar_streaming_sortformer_4spk-v2
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
2829
quant_name Quantization type (optional, default: non-quantized)
2930
Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
167168
PREPROCESSOR_FEATURE_SIZE=""
168169
PREPROCESSOR_OUTPUT=""
169170
;;
171+
facebook/dinov2-small-imagenet1k-1-layer)
172+
MODEL_NAME="dinov2"
173+
TASK=""
174+
MAX_SEQ_LEN=""
175+
EXTRA_PIP=""
176+
PREPROCESSOR_FEATURE_SIZE=""
177+
PREPROCESSOR_OUTPUT=""
178+
;;
170179
mistralai/Voxtral-Mini-4B-Realtime-2602)
171180
MODEL_NAME="voxtral_realtime"
172181
TASK=""
@@ -177,7 +186,7 @@ case "$HF_MODEL" in
177186
;;
178187
*)
179188
echo "Error: Unsupported model '$HF_MODEL'"
180-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
189+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
181190
exit 1
182191
;;
183192
esac
@@ -293,6 +302,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
293302
exit 0
294303
fi
295304

305+
# DINOv2 uses a custom export script
306+
if [ "$MODEL_NAME" = "dinov2" ]; then
307+
pip install -r examples/models/dinov2/install_requirements.txt
308+
309+
python -m executorch.examples.models.dinov2.export_dinov2 \
310+
--backend "$DEVICE" \
311+
--output-dir "${OUTPUT_DIR}"
312+
313+
test -f "${OUTPUT_DIR}/model.pte"
314+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
315+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
316+
fi
317+
ls -al "${OUTPUT_DIR}"
318+
echo "::endgroup::"
319+
exit 0
320+
fi
321+
296322
# Voxtral Realtime uses a custom export script
297323
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
298324
pip install safetensors huggingface_hub

.ci/scripts/test_backend.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
4646
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
4747

4848
# TODO Get SDK root from install scripts
49-
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
49+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
5050
fi
5151

5252
if [[ "$FLOW" == *vulkan* ]]; then

.ci/scripts/test_model_e2e.sh

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- Qwen/Qwen3-0.6B
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
- mistralai/Voxtral-Mini-4B-Realtime-2602
2829
2930
quant_name Quantization type (required)
@@ -190,6 +191,19 @@ case "$HF_MODEL" in
190191
AUDIO_FILE="poem.wav"
191192
IMAGE_PATH=""
192193
;;
194+
facebook/dinov2-small-imagenet1k-1-layer)
195+
MODEL_NAME="dinov2"
196+
RUNNER_TARGET="dinov2_runner"
197+
RUNNER_PATH="dinov2"
198+
EXPECTED_OUTPUT="Samoyed"
199+
PREPROCESSOR=""
200+
TOKENIZER_URL=""
201+
TOKENIZER_FILE=""
202+
AUDIO_URL=""
203+
AUDIO_FILE=""
204+
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
205+
IMAGE_PATH=""
206+
;;
193207
mistralai/Voxtral-Mini-4B-Realtime-2602)
194208
MODEL_NAME="voxtral_realtime"
195209
RUNNER_TARGET="voxtral_realtime_runner"
@@ -204,7 +218,7 @@ case "$HF_MODEL" in
204218
;;
205219
*)
206220
echo "Error: Unsupported model '$HF_MODEL'"
207-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
221+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
208222
exit 1
209223
;;
210224
esac
@@ -218,7 +232,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
218232

219233

220234
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
221-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ]; then
235+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
222236
if [ "$TOKENIZER_FILE" != "" ]; then
223237
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
224238
else
@@ -238,6 +252,11 @@ elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ];
238252
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
239253
fi
240254

255+
# Download test image for vision models
256+
if [ -n "${IMAGE_URL:-}" ]; then
257+
curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
258+
fi
259+
241260
ls -al
242261
echo "::endgroup::"
243262

@@ -316,6 +335,12 @@ EOF
316335
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
317336
fi
318337
;;
338+
dinov2)
339+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
340+
if [ "$DEVICE" = "cuda" ]; then
341+
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342+
fi
343+
;;
319344
voxtral_realtime)
320345
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
321346
# Add CUDA data path if present

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ param(
1313
[Parameter(Mandatory = $true)]
1414
[string]$QuantName,
1515
[string]$ModelDir = ".",
16-
[string]$ExpectedCudaVersion = ""
16+
[string]$ExpectedCudaVersion = "",
17+
[string]$Mode = ""
1718
)
1819

1920
Set-StrictMode -Version Latest
@@ -25,6 +26,15 @@ if ($Device -ne "cuda-windows") {
2526
throw "Unsupported device '$Device'. Expected 'cuda-windows'."
2627
}
2728

29+
if ($Mode -ne "") {
30+
if ($Mode -notin @("vr-streaming", "vr-offline")) {
31+
throw "Unsupported mode '$Mode'. Supported modes: vr-streaming, vr-offline"
32+
}
33+
if ($HfModel -ne "mistralai/Voxtral-Mini-4B-Realtime-2602") {
34+
throw "Mode '$Mode' can only be used with Voxtral Realtime model"
35+
}
36+
}
37+
2838
Write-Host "Testing model: $HfModel (quantization: $QuantName)"
2939

3040
$resolvedModelDir = (Resolve-Path -Path $ModelDir).Path
@@ -79,15 +89,28 @@ switch ($HfModel) {
7989
$runnerTarget = "voxtral_realtime_runner"
8090
$runnerPath = "voxtral_realtime"
8191
$runnerPreset = "voxtral-realtime-cuda"
82-
$expectedOutput = "Loading audio from"
92+
$expectedOutput = "Quilter"
8393
$preprocessor = "preprocessor.pte"
8494
$tokenizerUrl = ""
8595
$tokenizerFile = "tekken.json"
8696
$audioUrl = "https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
8797
$audioFile = "poem.wav"
8898
}
99+
"facebook/dinov2-small-imagenet1k-1-layer" {
100+
$runnerTarget = "dinov2_runner"
101+
$runnerPath = "dinov2"
102+
$runnerPreset = "dinov2-cuda"
103+
$expectedOutput = "Samoyed"
104+
$preprocessor = ""
105+
$tokenizerUrl = ""
106+
$tokenizerFile = ""
107+
$audioUrl = ""
108+
$audioFile = ""
109+
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
110+
$imageFile = "test_image.jpg"
111+
}
89112
default {
90-
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
113+
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
91114
}
92115
}
93116

@@ -162,6 +185,9 @@ try {
162185
if ($audioUrl -ne "") {
163186
Download-IfNeeded -Url $audioUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $audioFile)
164187
}
188+
if ((Get-Variable -Name imageUrl -ErrorAction SilentlyContinue) -and $imageUrl -ne "") {
189+
Download-IfNeeded -Url $imageUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
190+
}
165191
Get-ChildItem -Path $resolvedModelDir
166192
Write-Host "::endgroup::"
167193

@@ -207,6 +233,16 @@ try {
207233
"--audio_path", (Join-Path -Path $resolvedModelDir -ChildPath $audioFile),
208234
"--preprocessor_path", (Join-Path -Path $resolvedModelDir -ChildPath $preprocessor)
209235
)
236+
if ($Mode -ne "vr-offline") {
237+
$runnerArgs += "--streaming"
238+
}
239+
}
240+
"facebook/dinov2-small-imagenet1k-1-layer" {
241+
$runnerArgs = @(
242+
"--model_path", $modelPte,
243+
"--data_path", $cudaBlob,
244+
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
245+
)
210246
}
211247
}
212248

.ci/scripts/unittest-linux-cmake.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ if ! python -c "import tosa_serializer" >/dev/null 2>&1; then
1919
TOSA_SERIALIZATION_DIR="${TOSA_TOOLS_DIR}/serialization"
2020
fi
2121

22+
# NOTE: Will be removed when tosa-tools is installed via pypi
23+
python -m pip install pybind11==2.10.4
2224
CMAKE_POLICY_VERSION_MINIMUM=3.5 BUILD_PYBIND=1 \
2325
python -m pip install --no-dependencies \
2426
"${TOSA_SERIALIZATION_DIR}"

.github/workflows/_unittest.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ jobs:
5858
if: ${{ inputs.build-tool == 'cmake' }}
5959
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
6060
with:
61-
submodules: 'recursive'
6261
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
6362
timeout: 120
6463
script: |
64+
git config --global http.sslBackend openssl
65+
git submodule update --init --recursive
6566
conda init powershell
6667
6768
powershell -Command "& {

.github/workflows/build-presets.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,11 @@ jobs:
113113
with:
114114
job-name: build
115115
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
116-
submodules: recursive
117116
timeout: 90
118117
script: |
119118
set -eux
119+
git config --global http.sslBackend openssl
120+
git submodule update --init --recursive
120121
conda init powershell
121122
powershell -Command "& {
122123
Set-PSDebug -Trace 1

.github/workflows/cuda-windows.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ jobs:
4747
- model_repo: "mistralai"
4848
model_name: "Voxtral-Mini-4B-Realtime-2602"
4949
quant: "quantized-int4-tile-packed"
50+
- model_repo: "facebook"
51+
model_name: "dinov2-small-imagenet1k-1-layer"
52+
quant: "non-quantized"
5053
with:
5154
timeout: 90
5255
secrets-env: EXECUTORCH_HF_TOKEN
@@ -83,12 +86,15 @@ jobs:
8386
PYTHON_EXECUTABLE=python ./install_executorch.sh
8487
echo "::endgroup::"
8588
86-
echo "::group::Setup Huggingface"
87-
pip install -U "huggingface_hub[cli]<1.0" accelerate
88-
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
89-
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
90-
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
91-
echo "::endgroup::"
89+
# Setup Huggingface only for models that need it (not dinov2)
90+
if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
91+
echo "::group::Setup Huggingface"
92+
pip install -U "huggingface_hub[cli]<1.0" accelerate
93+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
94+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
95+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
96+
echo "::endgroup::"
97+
fi
9298
9399
VR_MODE=""
94100
if [ "${{ matrix.model_name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
@@ -122,15 +128,19 @@ jobs:
122128
- model_repo: "mistralai"
123129
model_name: "Voxtral-Mini-4B-Realtime-2602"
124130
quant: "quantized-int4-tile-packed"
131+
- model_repo: "facebook"
132+
model_name: "dinov2-small-imagenet1k-1-layer"
133+
quant: "non-quantized"
125134
with:
126135
timeout: 240
127136
runner: windows.g5.4xlarge.nvidia.gpu
128137
gpu-arch-type: cuda
129138
gpu-arch-version: 12.8
130-
submodules: recursive
131139
download-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }}
132140
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
133141
script: |
142+
git config --global http.sslBackend openssl
143+
git submodule update --init --recursive
134144
conda init powershell
135145
powershell -Command "& {
136146
Set-PSDebug -Trace 1

.github/workflows/cuda.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ jobs:
151151
name: "Qwen3-0.6B"
152152
- repo: "nvidia"
153153
name: "parakeet-tdt"
154+
- repo: "facebook"
155+
name: "dinov2-small-imagenet1k-1-layer"
154156
quant:
155157
- "non-quantized"
156158
- "quantized-int4-tile-packed"
@@ -179,6 +181,15 @@ jobs:
179181
repo: "nvidia"
180182
name: "diar_streaming_sortformer_4spk-v2"
181183
quant: "quantized-int4-weight-only"
184+
# DINOv2 currently supports only non-quantized export
185+
- model:
186+
repo: "facebook"
187+
name: "dinov2-small-imagenet1k-1-layer"
188+
quant: "quantized-int4-tile-packed"
189+
- model:
190+
repo: "facebook"
191+
name: "dinov2-small-imagenet1k-1-layer"
192+
quant: "quantized-int4-weight-only"
182193
with:
183194
timeout: 90
184195
secrets-env: EXECUTORCH_HF_TOKEN
@@ -198,8 +209,8 @@ jobs:
198209
./install_executorch.sh
199210
echo "::endgroup::"
200211
201-
# Setup Huggingface only for models that need it (not parakeet)
202-
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ]; then
212+
# Setup Huggingface only for models that need it (not parakeet or dinov2)
213+
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
203214
echo "::group::Setup Huggingface"
204215
pip install -U "huggingface_hub[cli]<1.0" accelerate
205216
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
@@ -235,6 +246,8 @@ jobs:
235246
name: "gemma-3-4b-it"
236247
- repo: "nvidia"
237248
name: "parakeet-tdt"
249+
- repo: "facebook"
250+
name: "dinov2-small-imagenet1k-1-layer"
238251
quant:
239252
- "non-quantized"
240253
- "quantized-int4-tile-packed"
@@ -263,6 +276,15 @@ jobs:
263276
repo: "nvidia"
264277
name: "diar_streaming_sortformer_4spk-v2"
265278
quant: "quantized-int4-weight-only"
279+
# DINOv2 currently supports only non-quantized export
280+
- model:
281+
repo: "facebook"
282+
name: "dinov2-small-imagenet1k-1-layer"
283+
quant: "quantized-int4-tile-packed"
284+
- model:
285+
repo: "facebook"
286+
name: "dinov2-small-imagenet1k-1-layer"
287+
quant: "quantized-int4-weight-only"
266288
with:
267289
timeout: 90
268290
runner: linux.g5.4xlarge.nvidia.gpu

0 commit comments

Comments
 (0)