pytorch
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 27 additions & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎.ci/scripts/test_backend.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_backend.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 27 additions & 2 deletions b/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 27 additions & 2 deletions
diff --git a/‎.ci/scripts/test_model_e2e_windows.ps1‎
Lines changed: 39 additions & 3 deletions b/‎.ci/scripts/test_model_e2e_windows.ps1‎
Lines changed: 39 additions & 3 deletions
diff --git a/‎.ci/scripts/unittest-linux-cmake.sh‎
Lines changed: 2 additions & 0 deletions b/‎.ci/scripts/unittest-linux-cmake.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/_unittest.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/_unittest.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/build-presets.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/cuda-windows.yml‎
Lines changed: 17 additions & 7 deletions b/‎.github/workflows/cuda-windows.yml‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 24 additions & 2 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 24 additions & 2 deletions
@@ -24,6 +24,7 @@ Arguments:
                  - google/gemma-3-4b-it
                  - nvidia/diar_streaming_sortformer_4spk-v2
                  - nvidia/parakeet-tdt
+                 - facebook/dinov2-small-imagenet1k-1-layer
 
   quant_name   Quantization type (optional, default: non-quantized)
                Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE=""
     PREPROCESSOR_OUTPUT=""
     ;;
+  facebook/dinov2-small-imagenet1k-1-layer)
+    MODEL_NAME="dinov2"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
   mistralai/Voxtral-Mini-4B-Realtime-2602)
     MODEL_NAME="voxtral_realtime"
     TASK=""
@@ -177,7 +186,7 @@ case "$HF_MODEL" in
     ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
     exit 1
     ;;
 esac
@@ -293,6 +302,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
   exit 0
 fi
 
+# DINOv2 uses a custom export script
+if [ "$MODEL_NAME" = "dinov2" ]; then
+  pip install -r examples/models/dinov2/install_requirements.txt
+
+  python -m executorch.examples.models.dinov2.export_dinov2 \
+      --backend "$DEVICE" \
+      --output-dir "${OUTPUT_DIR}"
+
+  test -f "${OUTPUT_DIR}/model.pte"
+  if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+    test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  fi
+  ls -al "${OUTPUT_DIR}"
+  echo "::endgroup::"
+  exit 0
+fi
+
 # Voxtral Realtime uses a custom export script
 if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   pip install safetensors huggingface_hub
 
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
     export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
 
     # TODO Get SDK root from install scripts
-    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
 fi
 
 if [[ "$FLOW" == *vulkan* ]]; then
 
@@ -24,6 +24,7 @@ Arguments:
                 - google/gemma-3-4b-it
                 - Qwen/Qwen3-0.6B
                 - nvidia/parakeet-tdt
+                - facebook/dinov2-small-imagenet1k-1-layer
                 - mistralai/Voxtral-Mini-4B-Realtime-2602
 
   quant_name  Quantization type (required)
@@ -190,6 +191,19 @@ case "$HF_MODEL" in
     AUDIO_FILE="poem.wav"
     IMAGE_PATH=""
     ;;
+  facebook/dinov2-small-imagenet1k-1-layer)
+    MODEL_NAME="dinov2"
+    RUNNER_TARGET="dinov2_runner"
+    RUNNER_PATH="dinov2"
+    EXPECTED_OUTPUT="Samoyed"
+    PREPROCESSOR=""
+    TOKENIZER_URL=""
+    TOKENIZER_FILE=""
+    AUDIO_URL=""
+    AUDIO_FILE=""
+    IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
+    IMAGE_PATH=""
+    ;;
   mistralai/Voxtral-Mini-4B-Realtime-2602)
     MODEL_NAME="voxtral_realtime"
     RUNNER_TARGET="voxtral_realtime_runner"
@@ -204,7 +218,7 @@ case "$HF_MODEL" in
     ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
     exit 1
     ;;
 esac
@@ -218,7 +232,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
 
 
 # Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
-if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ]; then
+if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
   if [ "$TOKENIZER_FILE" != "" ]; then
     curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
   else
@@ -238,6 +252,11 @@ elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ];
   python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
 fi
 
+# Download test image for vision models
+if [ -n "${IMAGE_URL:-}" ]; then
+  curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
+fi
+
 ls -al
 echo "::endgroup::"
 
@@ -316,6 +335,12 @@ EOF
       RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
     fi
     ;;
+  dinov2)
+    RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
+    if [ "$DEVICE" = "cuda" ]; then
+      RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
+    fi
+    ;;
   voxtral_realtime)
     RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
     # Add CUDA data path if present
 
@@ -13,7 +13,8 @@ param(
     [Parameter(Mandatory = $true)]
     [string]$QuantName,
     [string]$ModelDir = ".",
-    [string]$ExpectedCudaVersion = ""
+    [string]$ExpectedCudaVersion = "",
+    [string]$Mode = ""
 )
 
 Set-StrictMode -Version Latest
@@ -25,6 +26,15 @@ if ($Device -ne "cuda-windows") {
     throw "Unsupported device '$Device'. Expected 'cuda-windows'."
 }
 
+if ($Mode -ne "") {
+    if ($Mode -notin @("vr-streaming", "vr-offline")) {
+        throw "Unsupported mode '$Mode'. Supported modes: vr-streaming, vr-offline"
+    }
+    if ($HfModel -ne "mistralai/Voxtral-Mini-4B-Realtime-2602") {
+        throw "Mode '$Mode' can only be used with Voxtral Realtime model"
+    }
+}
+
 Write-Host "Testing model: $HfModel (quantization: $QuantName)"
 
 $resolvedModelDir = (Resolve-Path -Path $ModelDir).Path
@@ -79,15 +89,28 @@ switch ($HfModel) {
         $runnerTarget = "voxtral_realtime_runner"
         $runnerPath = "voxtral_realtime"
         $runnerPreset = "voxtral-realtime-cuda"
-        $expectedOutput = "Loading audio from"
+        $expectedOutput = "Quilter"
         $preprocessor = "preprocessor.pte"
         $tokenizerUrl = ""
         $tokenizerFile = "tekken.json"
         $audioUrl = "https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
         $audioFile = "poem.wav"
     }
+    "facebook/dinov2-small-imagenet1k-1-layer" {
+        $runnerTarget = "dinov2_runner"
+        $runnerPath = "dinov2"
+        $runnerPreset = "dinov2-cuda"
+        $expectedOutput = "Samoyed"
+        $preprocessor = ""
+        $tokenizerUrl = ""
+        $tokenizerFile = ""
+        $audioUrl = ""
+        $audioFile = ""
+        $imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
+        $imageFile = "test_image.jpg"
+    }
     default {
-        throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
+        throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
     }
 }
 
@@ -162,6 +185,9 @@ try {
     if ($audioUrl -ne "") {
         Download-IfNeeded -Url $audioUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $audioFile)
     }
+    if ((Get-Variable -Name imageUrl -ErrorAction SilentlyContinue) -and $imageUrl -ne "") {
+        Download-IfNeeded -Url $imageUrl -OutFile (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
+    }
     Get-ChildItem -Path $resolvedModelDir
     Write-Host "::endgroup::"
 
@@ -207,6 +233,16 @@ try {
                 "--audio_path", (Join-Path -Path $resolvedModelDir -ChildPath $audioFile),
                 "--preprocessor_path", (Join-Path -Path $resolvedModelDir -ChildPath $preprocessor)
             )
+            if ($Mode -ne "vr-offline") {
+                $runnerArgs += "--streaming"
+            }
+        }
+        "facebook/dinov2-small-imagenet1k-1-layer" {
+            $runnerArgs = @(
+                "--model_path", $modelPte,
+                "--data_path", $cudaBlob,
+                "--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
+            )
         }
     }
 
 
@@ -19,6 +19,8 @@ if ! python -c "import tosa_serializer" >/dev/null 2>&1; then
     TOSA_SERIALIZATION_DIR="${TOSA_TOOLS_DIR}/serialization"
   fi
 
+  # NOTE: Will be removed when tosa-tools is installed via pypi
+  python -m pip install pybind11==2.10.4
   CMAKE_POLICY_VERSION_MINIMUM=3.5 BUILD_PYBIND=1 \
     python -m pip install --no-dependencies \
     "${TOSA_SERIALIZATION_DIR}"
 
@@ -58,10 +58,11 @@ jobs:
     if: ${{ inputs.build-tool == 'cmake' }}
     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
     with:
-      submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 120
       script: |
+        git config --global http.sslBackend openssl
+        git submodule update --init --recursive 
         conda init powershell
 
         powershell -Command "& {
 
@@ -113,10 +113,11 @@ jobs:
     with:
       job-name: build
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      submodules: recursive
       timeout: 90
       script: |
         set -eux
+        git config --global http.sslBackend openssl
+        git submodule update --init --recursive
         conda init powershell
         powershell -Command "& {
           Set-PSDebug -Trace 1
 
@@ -47,6 +47,9 @@ jobs:
           - model_repo: "mistralai"
             model_name: "Voxtral-Mini-4B-Realtime-2602"
             quant: "quantized-int4-tile-packed"
+          - model_repo: "facebook"
+            model_name: "dinov2-small-imagenet1k-1-layer"
+            quant: "non-quantized"
     with:
       timeout: 90
       secrets-env: EXECUTORCH_HF_TOKEN
@@ -83,12 +86,15 @@ jobs:
         PYTHON_EXECUTABLE=python ./install_executorch.sh
         echo "::endgroup::"
 
-        echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]<1.0" accelerate
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
-        echo "::endgroup::"
+        # Setup Huggingface only for models that need it (not dinov2)
+        if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
+          echo "::group::Setup Huggingface"
+          pip install -U "huggingface_hub[cli]<1.0" accelerate
+          huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+          OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+          pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
+          echo "::endgroup::"
+        fi
 
         VR_MODE=""
         if [ "${{ matrix.model_name }}" = "Voxtral-Mini-4B-Realtime-2602" ]; then
@@ -122,15 +128,19 @@ jobs:
           - model_repo: "mistralai"
             model_name: "Voxtral-Mini-4B-Realtime-2602"
             quant: "quantized-int4-tile-packed"
+          - model_repo: "facebook"
+            model_name: "dinov2-small-imagenet1k-1-layer"
+            quant: "non-quantized"
     with:
       timeout: 240
       runner: windows.g5.4xlarge.nvidia.gpu
       gpu-arch-type: cuda
       gpu-arch-version: 12.8
-      submodules: recursive
       download-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }}
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
+        git config --global http.sslBackend openssl
+        git submodule update --init --recursive
         conda init powershell
         powershell -Command "& {
           Set-PSDebug -Trace 1
 
@@ -151,6 +151,8 @@ jobs:
             name: "Qwen3-0.6B"
           - repo: "nvidia"
             name: "parakeet-tdt"
+          - repo: "facebook"
+            name: "dinov2-small-imagenet1k-1-layer"
         quant:
           - "non-quantized"
           - "quantized-int4-tile-packed"
@@ -179,6 +181,15 @@ jobs:
               repo: "nvidia"
               name: "diar_streaming_sortformer_4spk-v2"
             quant: "quantized-int4-weight-only"
+          # DINOv2 currently supports only non-quantized export
+          - model:
+              repo: "facebook"
+              name: "dinov2-small-imagenet1k-1-layer"
+            quant: "quantized-int4-tile-packed"
+          - model:
+              repo: "facebook"
+              name: "dinov2-small-imagenet1k-1-layer"
+            quant: "quantized-int4-weight-only"
     with:
       timeout: 90
       secrets-env: EXECUTORCH_HF_TOKEN
@@ -198,8 +209,8 @@ jobs:
         ./install_executorch.sh
         echo "::endgroup::"
 
-        # Setup Huggingface only for models that need it (not parakeet)
-        if [ "${{ matrix.model.name }}" != "parakeet-tdt" ]; then
+        # Setup Huggingface only for models that need it (not parakeet or dinov2)
+        if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
           echo "::group::Setup Huggingface"
           pip install -U "huggingface_hub[cli]<1.0" accelerate
           huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
@@ -235,6 +246,8 @@ jobs:
             name: "gemma-3-4b-it"
           - repo: "nvidia"
             name: "parakeet-tdt"
+          - repo: "facebook"
+            name: "dinov2-small-imagenet1k-1-layer"
         quant:
           - "non-quantized"
           - "quantized-int4-tile-packed"
@@ -263,6 +276,15 @@ jobs:
               repo: "nvidia"
               name: "diar_streaming_sortformer_4spk-v2"
             quant: "quantized-int4-weight-only"
+          # DINOv2 currently supports only non-quantized export
+          - model:
+              repo: "facebook"
+              name: "dinov2-small-imagenet1k-1-layer"
+            quant: "quantized-int4-tile-packed"
+          - model:
+              repo: "facebook"
+              name: "dinov2-small-imagenet1k-1-layer"
+            quant: "quantized-int4-weight-only"
     with:
       timeout: 90
       runner: linux.g5.4xlarge.nvidia.gpu