Skip to content

Commit b371966

Browse files
author
ssjia
committed
Update on "[ET-VK] Add ANY_STORAGE support to expand_copy"
Add a texture shader variant for expand_copy and a resize function for dynamic shape support. The texture shader maps each output texel coordinate to the corresponding input texel using modulo on the input sizes, matching the semantics of the existing buffer shader. Use meta_ubo() instead of buffer_meta_ubo() so the correct UBO type is selected based on storage type. Use extract_int_or_symint_list() for target sizes to handle symbolic integers. Register as ANY_STORAGE. Differential Revision: [D95970162](https://our.internmc.facebook.com/intern/diff/D95970162/) [ghstack-poisoned]
2 parents 0e98983 + 6857b33 commit b371966

File tree

176 files changed

+7707
-8532
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+7707
-8532
lines changed

.ci/scripts/export_model_artifact.sh

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- nvidia/diar_streaming_sortformer_4spk-v2
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
2829
quant_name Quantization type (optional, default: non-quantized)
2930
Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
167168
PREPROCESSOR_FEATURE_SIZE=""
168169
PREPROCESSOR_OUTPUT=""
169170
;;
171+
facebook/dinov2-small-imagenet1k-1-layer)
172+
MODEL_NAME="dinov2"
173+
TASK=""
174+
MAX_SEQ_LEN=""
175+
EXTRA_PIP=""
176+
PREPROCESSOR_FEATURE_SIZE=""
177+
PREPROCESSOR_OUTPUT=""
178+
;;
170179
mistralai/Voxtral-Mini-4B-Realtime-2602)
171180
MODEL_NAME="voxtral_realtime"
172181
TASK=""
@@ -177,7 +186,7 @@ case "$HF_MODEL" in
177186
;;
178187
*)
179188
echo "Error: Unsupported model '$HF_MODEL'"
180-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
189+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
181190
exit 1
182191
;;
183192
esac
@@ -293,6 +302,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
293302
exit 0
294303
fi
295304

305+
# DINOv2 uses a custom export script
306+
if [ "$MODEL_NAME" = "dinov2" ]; then
307+
pip install -r examples/models/dinov2/install_requirements.txt
308+
309+
python -m executorch.examples.models.dinov2.export_dinov2 \
310+
--backend "$DEVICE" \
311+
--output-dir "${OUTPUT_DIR}"
312+
313+
test -f "${OUTPUT_DIR}/model.pte"
314+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
315+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
316+
fi
317+
ls -al "${OUTPUT_DIR}"
318+
echo "::endgroup::"
319+
exit 0
320+
fi
321+
296322
# Voxtral Realtime uses a custom export script
297323
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
298324
pip install safetensors huggingface_hub

.ci/scripts/test_backend.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
4646
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
4747

4848
# TODO Get SDK root from install scripts
49-
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
49+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
5050
fi
5151

5252
if [[ "$FLOW" == *vulkan* ]]; then

.ci/scripts/test_model_e2e.sh

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- Qwen/Qwen3-0.6B
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
- mistralai/Voxtral-Mini-4B-Realtime-2602
2829
2930
quant_name Quantization type (required)
@@ -190,6 +191,19 @@ case "$HF_MODEL" in
190191
AUDIO_FILE="poem.wav"
191192
IMAGE_PATH=""
192193
;;
194+
facebook/dinov2-small-imagenet1k-1-layer)
195+
MODEL_NAME="dinov2"
196+
RUNNER_TARGET="dinov2_runner"
197+
RUNNER_PATH="dinov2"
198+
EXPECTED_OUTPUT="Samoyed"
199+
PREPROCESSOR=""
200+
TOKENIZER_URL=""
201+
TOKENIZER_FILE=""
202+
AUDIO_URL=""
203+
AUDIO_FILE=""
204+
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
205+
IMAGE_PATH=""
206+
;;
193207
mistralai/Voxtral-Mini-4B-Realtime-2602)
194208
MODEL_NAME="voxtral_realtime"
195209
RUNNER_TARGET="voxtral_realtime_runner"
@@ -204,7 +218,7 @@ case "$HF_MODEL" in
204218
;;
205219
*)
206220
echo "Error: Unsupported model '$HF_MODEL'"
207-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
221+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
208222
exit 1
209223
;;
210224
esac
@@ -218,7 +232,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
218232

219233

220234
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
221-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ]; then
235+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
222236
if [ "$TOKENIZER_FILE" != "" ]; then
223237
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
224238
else
@@ -238,6 +252,11 @@ elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ];
238252
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
239253
fi
240254

255+
# Download test image for vision models
256+
if [ -n "${IMAGE_URL:-}" ]; then
257+
curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
258+
fi
259+
241260
ls -al
242261
echo "::endgroup::"
243262

@@ -316,6 +335,12 @@ EOF
316335
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
317336
fi
318337
;;
338+
dinov2)
339+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
340+
if [ "$DEVICE" = "cuda" ]; then
341+
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342+
fi
343+
;;
319344
voxtral_realtime)
320345
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
321346
# Add CUDA data path if present

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ param(
1313
[Parameter(Mandatory = $true)]
1414
[string]$QuantName,
1515
[string]$ModelDir = ".",
16-
[string]$ExpectedCudaVersion = ""
16+
[string]$ExpectedCudaVersion = "",
17+
[string]$Mode = ""
1718
)
1819

1920
Set-StrictMode -Version Latest
@@ -25,6 +26,15 @@ if ($Device -ne "cuda-windows") {
2526
throw "Unsupported device '$Device'. Expected 'cuda-windows'."
2627
}
2728

29+
if ($Mode -ne "") {
30+
if ($Mode -notin @("vr-streaming", "vr-offline")) {
31+
throw "Unsupported mode '$Mode'. Supported modes: vr-streaming, vr-offline"
32+
}
33+
if ($HfModel -ne "mistralai/Voxtral-Mini-4B-Realtime-2602") {
34+
throw "Mode '$Mode' can only be used with Voxtral Realtime model"
35+
}
36+
}
37+
2838
Write-Host "Testing model: $HfModel (quantization: $QuantName)"
2939

3040
$resolvedModelDir = (Resolve-Path -Path $ModelDir).Path
@@ -79,15 +89,28 @@ switch ($HfModel) {
7989
$runnerTarget = "voxtral_realtime_runner"
8090
$runnerPath = "voxtral_realtime"
8191
$runnerPreset = "voxtral-realtime-cuda"
82-
$expectedOutput = "Loading audio from"
92+
$expectedOutput = "Quilter"
8393
$preprocessor = "preprocessor.pte"
8494
$tokenizerUrl = ""
8595
$tokenizerFile = "tekken.json"
8696
$audioUrl = "https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
8797
$audioFile = "poem.wav"
8898
}
99+
"facebook/dinov2-small-imagenet1k-1-layer" {
100+
$runnerTarget = "dinov2_runner"
101+
$runnerPath = "dinov2"
102+
$runnerPreset = "dinov2-cuda"
103+
$expectedOutput = "Samoyed"
104+
$preprocessor = ""
105+
$tokenizerUrl = ""
106+
$tokenizerFile = ""
107+
$audioUrl = ""
108+
$audioFile = ""
109+
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
110+
$imageFile = "test_image.jpg"
111+
}
89112
default {
90-
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
113+
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
91114
}
92115
}
93116

@@ -162,6 +185,9 @@ try {
162185
if ($audioUrl -ne "") {
163186
Download-IfNeeded -Url $audioUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $audioFile)
164187
}
188+
if ((Get-Variable -Name imageUrl -ErrorAction SilentlyContinue) -and $imageUrl -ne "") {
189+
Download-IfNeeded -Url $imageUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
190+
}
165191
Get-ChildItem -Path $resolvedModelDir
166192
Write-Host "::endgroup::"
167193

@@ -207,6 +233,16 @@ try {
207233
"--audio_path", (Join-Path -Path $resolvedModelDir -ChildPath $audioFile),
208234
"--preprocessor_path", (Join-Path -Path $resolvedModelDir -ChildPath $preprocessor)
209235
)
236+
if ($Mode -ne "vr-offline") {
237+
$runnerArgs += "--streaming"
238+
}
239+
}
240+
"facebook/dinov2-small-imagenet1k-1-layer" {
241+
$runnerArgs = @(
242+
"--model_path", $modelPte,
243+
"--data_path", $cudaBlob,
244+
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
245+
)
210246
}
211247
}
212248

.ci/scripts/unittest-linux-cmake.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ if ! python -c "import tosa_serializer" >/dev/null 2>&1; then
1919
TOSA_SERIALIZATION_DIR="${TOSA_TOOLS_DIR}/serialization"
2020
fi
2121

22+
# NOTE: Will be removed when tosa-tools is installed via pypi
23+
python -m pip install pybind11==2.10.4
2224
CMAKE_POLICY_VERSION_MINIMUM=3.5 BUILD_PYBIND=1 \
2325
python -m pip install --no-dependencies \
2426
"${TOSA_SERIALIZATION_DIR}"

.github/workflows/_unittest.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ jobs:
5858
if: ${{ inputs.build-tool == 'cmake' }}
5959
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
6060
with:
61-
submodules: 'recursive'
6261
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
6362
timeout: 120
6463
script: |
64+
git config --global http.sslBackend openssl
65+
git submodule update --init --recursive
6566
conda init powershell
6667
6768
powershell -Command "& {

.github/workflows/build-presets.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,11 @@ jobs:
113113
with:
114114
job-name: build
115115
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
116-
submodules: recursive
117116
timeout: 90
118117
script: |
119118
set -eux
119+
git config --global http.sslBackend openssl
120+
git submodule update --init --recursive
120121
conda init powershell
121122
powershell -Command "& {
122123
Set-PSDebug -Trace 1

.github/workflows/cuda-windows.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ jobs:
4747
- model_repo: "mistralai"
4848
model_name: "Voxtral-Mini-4B-Realtime-2602"
4949
quant: "quantized-int4-tile-packed"
50+
- model_repo: "facebook"
51+
model_name: "dinov2-small-imagenet1k-1-layer"
52+
quant: "non-quantized"
5053
with:
5154
timeout: 90
5255
secrets-env: EXECUTORCH_HF_TOKEN
@@ -83,12 +86,15 @@ jobs:
8386
PYTHON_EXECUTABLE=python ./install_executorch.sh
8487
echo "::endgroup::"
8588
86-
echo "::group::Setup Huggingface"
87-
pip install -U "huggingface_hub[cli]<1.0" accelerate
88-
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
89-
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
90-
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
91-
echo "::endgroup::"
89+
# Setup Huggingface only for models that need it (not dinov2)
90+
if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
91+
echo "::group::Setup Huggingface"
92+
pip install -U "huggingface_hub[cli]<1.0" accelerate
93+
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
94+
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
95+
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
96+
echo "::endgroup::"
97+
fi
9298
9399
VR_MODE=""
94100
if [ "${{ matrix.model_name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
@@ -122,15 +128,19 @@ jobs:
122128
- model_repo: "mistralai"
123129
model_name: "Voxtral-Mini-4B-Realtime-2602"
124130
quant: "quantized-int4-tile-packed"
131+
- model_repo: "facebook"
132+
model_name: "dinov2-small-imagenet1k-1-layer"
133+
quant: "non-quantized"
125134
with:
126135
timeout: 240
127136
runner: windows.g5.4xlarge.nvidia.gpu
128137
gpu-arch-type: cuda
129138
gpu-arch-version: 12.8
130-
submodules: recursive
131139
download-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }}
132140
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
133141
script: |
142+
git config --global http.sslBackend openssl
143+
git submodule update --init --recursive
134144
conda init powershell
135145
powershell -Command "& {
136146
Set-PSDebug -Trace 1

.github/workflows/cuda.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ jobs:
151151
name: "Qwen3-0.6B"
152152
- repo: "nvidia"
153153
name: "parakeet-tdt"
154+
- repo: "facebook"
155+
name: "dinov2-small-imagenet1k-1-layer"
154156
quant:
155157
- "non-quantized"
156158
- "quantized-int4-tile-packed"
@@ -179,6 +181,15 @@ jobs:
179181
repo: "nvidia"
180182
name: "diar_streaming_sortformer_4spk-v2"
181183
quant: "quantized-int4-weight-only"
184+
# DINOv2 currently supports only non-quantized export
185+
- model:
186+
repo: "facebook"
187+
name: "dinov2-small-imagenet1k-1-layer"
188+
quant: "quantized-int4-tile-packed"
189+
- model:
190+
repo: "facebook"
191+
name: "dinov2-small-imagenet1k-1-layer"
192+
quant: "quantized-int4-weight-only"
182193
with:
183194
timeout: 90
184195
secrets-env: EXECUTORCH_HF_TOKEN
@@ -198,8 +209,8 @@ jobs:
198209
./install_executorch.sh
199210
echo "::endgroup::"
200211
201-
# Setup Huggingface only for models that need it (not parakeet)
202-
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ]; then
212+
# Setup Huggingface only for models that need it (not parakeet or dinov2)
213+
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
203214
echo "::group::Setup Huggingface"
204215
pip install -U "huggingface_hub[cli]<1.0" accelerate
205216
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
@@ -235,6 +246,8 @@ jobs:
235246
name: "gemma-3-4b-it"
236247
- repo: "nvidia"
237248
name: "parakeet-tdt"
249+
- repo: "facebook"
250+
name: "dinov2-small-imagenet1k-1-layer"
238251
quant:
239252
- "non-quantized"
240253
- "quantized-int4-tile-packed"
@@ -263,6 +276,15 @@ jobs:
263276
repo: "nvidia"
264277
name: "diar_streaming_sortformer_4spk-v2"
265278
quant: "quantized-int4-weight-only"
279+
# DINOv2 currently supports only non-quantized export
280+
- model:
281+
repo: "facebook"
282+
name: "dinov2-small-imagenet1k-1-layer"
283+
quant: "quantized-int4-tile-packed"
284+
- model:
285+
repo: "facebook"
286+
name: "dinov2-small-imagenet1k-1-layer"
287+
quant: "quantized-int4-weight-only"
266288
with:
267289
timeout: 90
268290
runner: linux.g5.4xlarge.nvidia.gpu

0 commit comments

Comments
 (0)