diff --git a/Package.swift b/Package.swift
index 1322b918c07..ca8e2086a87 100644
--- a/Package.swift
+++ b/Package.swift
@@ -15,7 +15,7 @@
//
// For details on building frameworks locally or using prebuilt binaries,
// see the documentation:
-// https://pytorch.org/executorch/main/using-executorch-ios.html
+// https://pytorch.org/executorch/0.6/using-executorch-ios.html
import PackageDescription
diff --git a/README-wheel.md b/README-wheel.md
index 9f074ab5ee3..8cb489724fd 100644
--- a/README-wheel.md
+++ b/README-wheel.md
@@ -14,10 +14,10 @@ to run ExecuTorch `.pte` files, with some restrictions:
operators](https://pytorch.org/executorch/stable/ir-ops-set-definition.html)
are linked into the prebuilt module
* Only the [XNNPACK backend
- delegate](https://pytorch.org/executorch/main/native-delegates-executorch-xnnpack-delegate.html)
+ delegate](https://pytorch.org/executorch/0.6/backends-xnnpack)
is linked into the prebuilt module.
-* \[macOS only] [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html)
- and [MPS](https://pytorch.org/executorch/main/build-run-mps.html) backend
+* \[macOS only] [Core ML](https://pytorch.org/executorch/0.6/backends-coreml)
+ and [MPS](https://pytorch.org/executorch/0.6/backends-mps) backend
delegates are also linked into the prebuilt module.
Please visit the [ExecuTorch website](https://pytorch.org/executorch/) for
@@ -30,7 +30,7 @@ tutorials and documentation. Here are some starting points:
* Learn how to use ExecuTorch to export and accelerate a large-language model
from scratch.
* [Exporting to
- ExecuTorch](https://pytorch.org/executorch/main/tutorials/export-to-executorch-tutorial.html)
+ ExecuTorch](https://pytorch.org/executorch/0.6/tutorials/export-to-executorch-tutorial.html)
* Learn the fundamentals of exporting a PyTorch `nn.Module` to ExecuTorch, and
optimizing its performance using quantization and hardware delegation.
* Running LLaMA on
diff --git a/backends/cadence/README.md b/backends/cadence/README.md
index 3cefb71d945..a88a8564a4b 100644
--- a/backends/cadence/README.md
+++ b/backends/cadence/README.md
@@ -6,7 +6,7 @@
## Tutorial
-Please follow the [tutorial](https://pytorch.org/executorch/main/backends-cadence) for more information on how to run models on Cadence/Xtensa DSPs.
+Please follow the [tutorial](https://pytorch.org/executorch/0.6/backends-cadence) for more information on how to run models on Cadence/Xtensa DSPs.
## Directory Structure
diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md
index 2ec26fb937a..d2add8d7048 100644
--- a/backends/qualcomm/README.md
+++ b/backends/qualcomm/README.md
@@ -8,7 +8,7 @@ This backend is implemented on the top of
[Qualcomm AI Engine Direct SDK](https://developer.qualcomm.com/software/qualcomm-ai-engine-direct-sdk).
Please follow [tutorial](../../docs/source/backends-qualcomm.md) to setup environment, build, and run executorch models by this backend (Qualcomm AI Engine Direct is also referred to as QNN in the source and documentation).
-A website version of the tutorial is [here](https://pytorch.org/executorch/main/backends-qualcomm).
+A website version of the tutorial is [here](https://pytorch.org/executorch/0.6/backends-qualcomm).
## Delegate Options
diff --git a/backends/xnnpack/README.md b/backends/xnnpack/README.md
index 2328f8e4b90..79b002847ba 100644
--- a/backends/xnnpack/README.md
+++ b/backends/xnnpack/README.md
@@ -132,5 +132,5 @@ create an issue on [github](https://www.github.com/pytorch/executorch/issues).
## See Also
For more information about the XNNPACK Backend, please check out the following resources:
-- [XNNPACK Backend](https://pytorch.org/executorch/main/backends-xnnpack)
-- [XNNPACK Backend Internals](https://pytorch.org/executorch/main/backend-delegates-xnnpack-reference)
+- [XNNPACK Backend](https://pytorch.org/executorch/0.6/backends-xnnpack)
+- [XNNPACK Backend Internals](https://pytorch.org/executorch/0.6/backend-delegates-xnnpack-reference)
diff --git a/docs/source/index.md b/docs/source/index.md
index 01f883020f3..931fb8ea995 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -79,7 +79,7 @@ ExecuTorch provides support for:
- [Executorch Runtime API Reference](executorch-runtime-api-reference)
- [Runtime Python API Reference](runtime-python-api-reference)
- [API Life Cycle](api-life-cycle)
-- [Javadoc](https://pytorch.org/executorch/main/javadoc/)
+- [Javadoc](https://pytorch.org/executorch/0.6/javadoc/)
#### Quantization
- [Overview](quantization-overview)
#### Kernel Library
@@ -208,7 +208,7 @@ export-to-executorch-api-reference
executorch-runtime-api-reference
runtime-python-api-reference
api-life-cycle
-Javadoc
+Javadoc
```
```{toctree}
diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md
index c2e01970a80..3bf9e77c8bb 100644
--- a/docs/source/llm/getting-started.md
+++ b/docs/source/llm/getting-started.md
@@ -159,7 +159,7 @@ example_inputs = (torch.randint(0, 100, (1, model.config.block_size), dtype=torc
# long as they adhere to the rules specified in the dynamic shape configuration.
# Here we set the range of 0th model input's 1st dimension as
# [0, model.config.block_size].
-# See https://pytorch.org/executorch/main/concepts#dynamic-shapes
+# See https://pytorch.org/executorch/0.6/concepts#dynamic-shapes
# for details about creating dynamic shapes.
dynamic_shape = (
{1: torch.export.Dim("token_dim", max=model.config.block_size)},
@@ -478,7 +478,7 @@ example_inputs = (
# long as they adhere to the rules specified in the dynamic shape configuration.
# Here we set the range of 0th model input's 1st dimension as
# [0, model.config.block_size].
-# See https://pytorch.org/executorch/main/concepts.html#dynamic-shapes
+# See https://pytorch.org/executorch/0.6/concepts.html#dynamic-shapes
# for details about creating dynamic shapes.
dynamic_shape = (
{1: torch.export.Dim("token_dim", max=model.config.block_size - 1)},
diff --git a/docs/source/memory-planning-inspection.md b/docs/source/memory-planning-inspection.md
index 47951a72038..45fa3ec0b45 100644
--- a/docs/source/memory-planning-inspection.md
+++ b/docs/source/memory-planning-inspection.md
@@ -1,9 +1,9 @@
# Memory Planning Inspection in ExecuTorch
-After the [Memory Planning](https://pytorch.org/executorch/main/concepts.html#memory-planning) pass of ExecuTorch, memory allocation information is stored on the nodes of the [`ExportedProgram`](https://pytorch.org/executorch/main/concepts.html#exportedprogram). Here, we present a tool designed to inspect memory allocation and visualize all active tensor objects.
+After the [Memory Planning](https://pytorch.org/executorch/0.6/concepts.html#memory-planning) pass of ExecuTorch, memory allocation information is stored on the nodes of the [`ExportedProgram`](https://pytorch.org/executorch/0.6/concepts.html#exportedprogram). Here, we present a tool designed to inspect memory allocation and visualize all active tensor objects.
## Usage
-User should add this code after they call [to_executorch()](https://pytorch.org/executorch/main/export-to-executorch-api-reference.html#executorch.exir.EdgeProgramManager.to_executorch), and it will write memory allocation information stored on the nodes to the file path "memory_profile.json". The file is compatible with the Chrome trace viewer; see below for more information about interpreting the results.
+User should add this code after they call [to_executorch()](https://pytorch.org/executorch/0.6/export-to-executorch-api-reference.html#executorch.exir.EdgeProgramManager.to_executorch), and it will write memory allocation information stored on the nodes to the file path "memory_profile.json". The file is compatible with the Chrome trace viewer; see below for more information about interpreting the results.
```python
from executorch.util.activation_memory_profiler import generate_memory_trace
@@ -13,7 +13,7 @@ generate_memory_trace(
enable_memory_offsets=True,
)
```
-* `prog` is an instance of [`ExecuTorchProgramManager`](https://pytorch.org/executorch/main/export-to-executorch-api-reference.html#executorch.exir.ExecutorchProgramManager), returned by [to_executorch()](https://pytorch.org/executorch/main/export-to-executorch-api-reference.html#executorch.exir.EdgeProgramManager.to_executorch).
+* `prog` is an instance of [`ExecuTorchProgramManager`](https://pytorch.org/executorch/0.6/export-to-executorch-api-reference.html#executorch.exir.ExecutorchProgramManager), returned by [to_executorch()](https://pytorch.org/executorch/0.6/export-to-executorch-api-reference.html#executorch.exir.EdgeProgramManager.to_executorch).
* Set `enable_memory_offsets` to `True` to show the location of each tensor on the memory space.
## Chrome Trace
@@ -27,4 +27,4 @@ Note that, since we are repurposing the Chrome trace tool, the axes in this cont
* The vertical axis has a 2-level hierarchy. The first level, "pid", represents memory space. For CPU, everything is allocated on one "space"; other backends may have multiple. In the second level, each row represents one time step. Since nodes will be executed sequentially, each node represents one time step, thus you will have as many nodes as there are rows.
## Further Reading
-* [Memory Planning](https://pytorch.org/executorch/main/compiler-memory-planning.html)
+* [Memory Planning](https://pytorch.org/executorch/0.6/compiler-memory-planning.html)
diff --git a/docs/source/new-contributor-guide.md b/docs/source/new-contributor-guide.md
index 13692da0234..3a220ff7ab1 100644
--- a/docs/source/new-contributor-guide.md
+++ b/docs/source/new-contributor-guide.md
@@ -129,7 +129,7 @@ Before you can start writing any code, you need to get a copy of ExecuTorch code
git push # push updated local main to your GitHub fork
```
-6. [Build the project](https://pytorch.org/executorch/main/using-executorch-building-from-source.html) and [run the tests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#testing).
+6. [Build the project](https://pytorch.org/executorch/0.6/using-executorch-building-from-source.html) and [run the tests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#testing).
Unfortunately, this step is too long to detail here. If you get stuck at any point, please feel free to ask for help on our [Discord server](https://discord.com/invite/Dh43CKSAdc) — we're always eager to help newcomers get onboarded.
diff --git a/docs/source/using-executorch-android.md b/docs/source/using-executorch-android.md
index 2b0d04da6c7..13a459c3fc9 100644
--- a/docs/source/using-executorch-android.md
+++ b/docs/source/using-executorch-android.md
@@ -2,7 +2,7 @@
To use from Android, ExecuTorch provides Java/Kotlin API bindings and Android platform integration, available as an AAR file.
-Note: This page covers Android app integration through the AAR library. The ExecuTorch C++ APIs can also be used from Android native, and the documentation can be found on [this page about cross compilation](https://pytorch.org/executorch/main/using-executorch-building-from-source.html#cross-compilation).
+Note: This page covers Android app integration through the AAR library. The ExecuTorch C++ APIs can also be used from Android native, and the documentation can be found on [this page about cross compilation](https://pytorch.org/executorch/0.6/using-executorch-building-from-source.html#cross-compilation).
## Installation
@@ -41,8 +41,8 @@ dependencies {
Note: If you want to use release v0.5.0, please use dependency `org.pytorch:executorch-android:0.5.1`.
Click the screenshot below to watch the *demo video* on how to add the package and run a simple ExecuTorch model with Android Studio.
-
-
+
+
## Using AAR file directly
@@ -130,17 +130,17 @@ Set environment variable `EXECUTORCH_CMAKE_BUILD_TYPE` to `Release` or `Debug` b
#### Using MediaTek backend
-To use [MediaTek backend](https://pytorch.org/executorch/main/backends-mediatek.html),
+To use [MediaTek backend](https://pytorch.org/executorch/0.6/backends-mediatek.html),
after installing and setting up the SDK, set `NEURON_BUFFER_ALLOCATOR_LIB` and `NEURON_USDK_ADAPTER_LIB` to the corresponding path.
#### Using Qualcomm AI Engine Backend
-To use [Qualcomm AI Engine Backend](https://pytorch.org/executorch/main/backends-qualcomm.html#qualcomm-ai-engine-backend),
+To use [Qualcomm AI Engine Backend](https://pytorch.org/executorch/0.6/backends-qualcomm.html#qualcomm-ai-engine-backend),
after installing and setting up the SDK, set `QNN_SDK_ROOT` to the corresponding path.
#### Using Vulkan Backend
-To use [Vulkan Backend](https://pytorch.org/executorch/main/backends-vulkan.html#vulkan-backend),
+To use [Vulkan Backend](https://pytorch.org/executorch/0.6/backends-vulkan.html#vulkan-backend),
set `EXECUTORCH_BUILD_VULKAN` to `ON`.
## Android Backends
@@ -195,4 +195,4 @@ using ExecuTorch AAR package.
## Java API reference
-Please see [Java API reference](https://pytorch.org/executorch/main/javadoc/).
+Please see [Java API reference](https://pytorch.org/executorch/0.6/javadoc/).
diff --git a/docs/source/using-executorch-ios.md b/docs/source/using-executorch-ios.md
index 83745d99ee5..3bae66734c1 100644
--- a/docs/source/using-executorch-ios.md
+++ b/docs/source/using-executorch-ios.md
@@ -35,8 +35,8 @@ Then select which ExecuTorch framework should link against which target.
Click the screenshot below to watch the *demo video* on how to add the package and run a simple ExecuTorch model on iOS.
-
-
+
+
#### CLI
@@ -293,7 +293,7 @@ From existing memory buffers:
From `NSData` / `Data`:
- `init(data:shape:dataType:...)`: Creates a tensor using an `NSData` object, referencing its bytes without copying.
-
+
From scalar arrays:
- `init(_:shape:dataType:...)`: Creates a tensor from an array of `NSNumber` scalars. Convenience initializers exist to infer shape or data type.
diff --git a/examples/README.md b/examples/README.md
index dbb3bf0dd3b..9a78937a851 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -9,7 +9,7 @@ ExecuTorch's extensive support spans from simple modules like "Add" to comprehen
## Directory structure
```
examples
-├── llm_manual # A storage place for the files that [LLM Maunal](https://pytorch.org/executorch/main/llm/getting-started.html) needs
+├── llm_manual # A storage place for the files that [LLM Maunal](https://pytorch.org/executorch/0.6/llm/getting-started.html) needs
├── models # Contains a set of popular and representative PyTorch models
├── portable # Contains end-to-end demos for ExecuTorch in portable mode
├── selective_build # Contains demos of selective build for optimizing the binary size of the ExecuTorch runtime
diff --git a/examples/arm/README.md b/examples/arm/README.md
index 14e5139adb1..546915ccab1 100644
--- a/examples/arm/README.md
+++ b/examples/arm/README.md
@@ -24,7 +24,7 @@ To run these scripts. On a Linux system, in a terminal, with a working internet
$ cd
$ executorch/examples/arm/setup.sh --i-agree-to-the-contained-eula [optional-scratch-dir]
-# Step [2] - Setup Patch to tools, The `setup.sh` script has generated a script that you need to source everytime you restart you shell.
+# Step [2] - Setup Patch to tools, The `setup.sh` script has generated a script that you need to source everytime you restart you shell.
$ source executorch/examples/arm/ethos-u-scratch/setup_path.sh
# Step [3] - build + run ExecuTorch and executor_runner baremetal application
@@ -34,6 +34,6 @@ $ executorch/examples/arm/run.sh --model_name=mv2 --target=ethos-u85-128 [--scra
### Online Tutorial
-We also have a [tutorial](https://pytorch.org/executorch/main/backends-arm-ethos-u) explaining the steps performed in these
+We also have a [tutorial](https://pytorch.org/executorch/0.6/backends-arm-ethos-u) explaining the steps performed in these
scripts, expected results, possible problems and more. It is a step-by-step guide
you can follow to better understand this delegate.
diff --git a/examples/demo-apps/apple_ios/LLaMA/README.md b/examples/demo-apps/apple_ios/LLaMA/README.md
index c39aa99bb77..6fa663d9fce 100644
--- a/examples/demo-apps/apple_ios/LLaMA/README.md
+++ b/examples/demo-apps/apple_ios/LLaMA/README.md
@@ -56,7 +56,7 @@ Link your binary with the ExecuTorch runtime and any backends or kernels used by
Note: To access logs, link against the Debug build of the ExecuTorch runtime, i.e., the executorch_debug framework. For optimal performance, always link against the Release version of the deliverables (those without the _debug suffix), which have all logging overhead removed.
-For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios).
+For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/0.6/using-executorch-ios).
### XCode
* Open XCode and select "Open an existing project" to open `examples/demo-apps/apple_ios/LLama`.
diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
index 67f6216237d..63c3bbcbe1a 100644
--- a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
+++ b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/mps_README.md
@@ -85,7 +85,7 @@ Link your binary with the ExecuTorch runtime and any backends or kernels used by
Note: To access logs, link against the Debug build of the ExecuTorch runtime, i.e., the executorch_debug framework. For optimal performance, always link against the Release version of the deliverables (those without the _debug suffix), which have all logging overhead removed.
-For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios.html).
+For more details integrating and Running ExecuTorch on Apple Platforms, checkout this [link](https://pytorch.org/executorch/0.6/using-executorch-ios.html).
diff --git a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md
index 8d3b2e83dbb..343e05912ff 100644
--- a/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md
+++ b/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md
@@ -164,7 +164,7 @@ If you cannot add the package into your app target (it's greyed out), it might h
- More details on integrating and Running ExecuTorch on Apple Platforms, check out the detailed guide [here](https://pytorch.org/executorch/main/using-executorch-ios#local-build).
+ More details on integrating and Running ExecuTorch on Apple Platforms, check out the detailed guide [here](https://pytorch.org/executorch/0.6/using-executorch-ios#local-build).
### 3. Configure Build Schemes
@@ -176,7 +176,7 @@ Navigate to `Product --> Scheme --> Edit Scheme --> Info --> Build Configuration
We recommend that you only use the Debug build scheme during development, where you might need to access additional logs. Debug build has logging overhead and will impact inferencing performance, while release build has compiler optimizations enabled and all logging overhead removed.
-For more details integrating and Running ExecuTorch on Apple Platforms or building the package locally, checkout this [link](https://pytorch.org/executorch/main/using-executorch-ios).
+For more details integrating and Running ExecuTorch on Apple Platforms or building the package locally, checkout this [link](https://pytorch.org/executorch/0.6/using-executorch-ios).
### 4. Build and Run the project
diff --git a/examples/llm_manual/README.md b/examples/llm_manual/README.md
index 6318bbe7e84..497d12687cd 100644
--- a/examples/llm_manual/README.md
+++ b/examples/llm_manual/README.md
@@ -1,3 +1,3 @@
# LLM Manual
-This repository is a storage place for the files that [LLM Manual](https://pytorch.org/executorch/main/llm/getting-started) needs. Please refer to the documentation website for more information.
+This repository is a storage place for the files that [LLM Manual](https://pytorch.org/executorch/0.6/llm/getting-started) needs. Please refer to the documentation website for more information.
diff --git a/examples/models/deepseek-r1-distill-llama-8B/README.md b/examples/models/deepseek-r1-distill-llama-8B/README.md
index 3a7a723c73b..13df9a64309 100644
--- a/examples/models/deepseek-r1-distill-llama-8B/README.md
+++ b/examples/models/deepseek-r1-distill-llama-8B/README.md
@@ -3,7 +3,7 @@ This example demonstrates how to run [Deepseek R1 Distill Llama 8B](https://hugg
# Instructions
## Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh`
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh`
2. Run the installation step for Llama specific requirements
```
diff --git a/examples/models/efficient_sam/README.md b/examples/models/efficient_sam/README.md
index c2ba18b2e16..2e92bf38134 100644
--- a/examples/models/efficient_sam/README.md
+++ b/examples/models/efficient_sam/README.md
@@ -6,13 +6,13 @@ This example demonstrates how to export the [EfficientSAM](https://github.com/yf
## 1. Setup
-Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup#) to set up ExecuTorch.
+Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup#) to set up ExecuTorch.
## 2. Exports
### Exporting to Core ML
-Make sure to install the [required dependencies](https://pytorch.org/executorch/main/backends-coreml#development-requirements) for Core ML export.
+Make sure to install the [required dependencies](https://pytorch.org/executorch/0.6/backends-coreml#development-requirements) for Core ML export.
To export the model to Core ML, run the following command:
@@ -32,7 +32,7 @@ python -m examples.xnnpack.aot_compiler -m efficient_sam
# Performance
-Tests were conducted on an Apple M1 Pro chip using the instructions for building and running Executorch with [Core ML](https://pytorch.org/executorch/main/backends-coreml#runtime-integration) and [XNNPACK](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering#running-the-xnnpack-model-with-cmake) backends.
+Tests were conducted on an Apple M1 Pro chip using the instructions for building and running Executorch with [Core ML](https://pytorch.org/executorch/0.6/backends-coreml#runtime-integration) and [XNNPACK](https://pytorch.org/executorch/0.6/tutorial-xnnpack-delegate-lowering#running-the-xnnpack-model-with-cmake) backends.
| Backend Configuration | Average Inference Time (seconds) |
| ---------------------- | -------------------------------- |
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
index 8efceae1959..9ab52716774 100644
--- a/examples/models/llama/README.md
+++ b/examples/models/llama/README.md
@@ -148,7 +148,7 @@ Llama 3 8B performance was measured on the Samsung Galaxy S22, S24, and OnePlus
## Step 1: Setup
> :warning: **double check your python environment**: make sure `conda activate ` is run before all the bash and python scripts.
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
2. Run `examples/models/llama/install_requirements.sh` to install a few dependencies.
@@ -379,10 +379,10 @@ adb shell "cd /data/local/tmp/llama && ./llama_main --model_path --t
### iOS
-Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-ios) to for full instructions on building the iOS LLAMA Demo App. Rename `tokenizer.model` file to `tokenizer.bin` because the demo app looks for the tokenizer file with .bin extension.
+Please refer to [this tutorial](https://pytorch.org/executorch/0.6/llm/llama-demo-ios) to for full instructions on building the iOS LLAMA Demo App. Rename `tokenizer.model` file to `tokenizer.bin` because the demo app looks for the tokenizer file with .bin extension.
### Android
-Please refer to [this tutorial](https://pytorch.org/executorch/main/llm/llama-demo-android) to for full instructions on building the Android LLAMA Demo App.
+Please refer to [this tutorial](https://pytorch.org/executorch/0.6/llm/llama-demo-android) to for full instructions on building the Android LLAMA Demo App.
## Running with low-bit kernels
diff --git a/examples/models/llama/UTILS.md b/examples/models/llama/UTILS.md
index 5f760ad7670..b4a8ddc363f 100644
--- a/examples/models/llama/UTILS.md
+++ b/examples/models/llama/UTILS.md
@@ -25,7 +25,7 @@ From `executorch` root:
## Smaller model delegated to other backends
Currently we supported lowering the stories model to other backends, including, CoreML, MPS and QNN. Please refer to the instruction
-for each backend ([CoreML](https://pytorch.org/executorch/main/backends-coreml), [MPS](https://pytorch.org/executorch/main/backends-mps), [QNN](https://pytorch.org/executorch/main/backends-qualcomm)) before trying to lower them. After the backend library is installed, the script to export a lowered model is
+for each backend ([CoreML](https://pytorch.org/executorch/0.6/backends-coreml), [MPS](https://pytorch.org/executorch/0.6/backends-mps), [QNN](https://pytorch.org/executorch/0.6/backends-qualcomm)) before trying to lower them. After the backend library is installed, the script to export a lowered model is
- Lower to CoreML: `python -m examples.models.llama.export_llama -kv --disable_dynamic_shape --coreml -c stories110M.pt -p params.json `
- MPS: `python -m examples.models.llama.export_llama -kv --disable_dynamic_shape --mps -c stories110M.pt -p params.json `
diff --git a/examples/models/llama/non_cpu_backends.md b/examples/models/llama/non_cpu_backends.md
index 1ee594ebd83..e0e7996198e 100644
--- a/examples/models/llama/non_cpu_backends.md
+++ b/examples/models/llama/non_cpu_backends.md
@@ -10,7 +10,7 @@ Export:
python -m examples.models.llama2.export_llama --checkpoint llama3.pt --params params.json -kv --disable_dynamic_shape --mps --use_sdpa_with_kv_cache -d fp32 -qmode 8da4w -G 32 --embedding-quantize 4,32
```
-After exporting the MPS model .pte file, the [iOS LLAMA](https://pytorch.org/executorch/main/llm/llama-demo-ios.html) app can support running the model. ` --embedding-quantize 4,32` is an optional args for quantizing embedding to reduce the model size.
+After exporting the MPS model .pte file, the [iOS LLAMA](https://pytorch.org/executorch/0.6/llm/llama-demo-ios.html) app can support running the model. ` --embedding-quantize 4,32` is an optional args for quantizing embedding to reduce the model size.
### CoreML
Export:
diff --git a/examples/models/phi-3-mini-lora/README.md b/examples/models/phi-3-mini-lora/README.md
index 62efda6c3dc..72ab495095c 100644
--- a/examples/models/phi-3-mini-lora/README.md
+++ b/examples/models/phi-3-mini-lora/README.md
@@ -16,7 +16,7 @@ To see how you can use the model exported for training in a fully involved finet
python export_model.py
```
-2. Run the inference model using an example runtime. For more detailed steps on this, check out [Building from Source](https://pytorch.org/executorch/main/using-executorch-building-from-source).
+2. Run the inference model using an example runtime. For more detailed steps on this, check out [Building from Source](https://pytorch.org/executorch/0.6/using-executorch-building-from-source).
```
# Clean and configure the CMake build system. Compiled programs will appear in the executorch/cmake-out directory we create here.
diff --git a/examples/models/phi-3-mini/README.md b/examples/models/phi-3-mini/README.md
index f52f2a3a06d..1f7fcce2ee4 100644
--- a/examples/models/phi-3-mini/README.md
+++ b/examples/models/phi-3-mini/README.md
@@ -3,7 +3,7 @@ This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/micro
# Instructions
## Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch. For installation run `./install_executorch.sh --pybind xnnpack`
2. Currently, we support transformers v4.44.2. Install transformers with the following command:
```
pip uninstall -y transformers ; pip install transformers==4.44.2
diff --git a/examples/qualcomm/oss_scripts/llama/README.md b/examples/qualcomm/oss_scripts/llama/README.md
index 855fcef8d91..6048e49d16c 100644
--- a/examples/qualcomm/oss_scripts/llama/README.md
+++ b/examples/qualcomm/oss_scripts/llama/README.md
@@ -12,7 +12,7 @@ KV Cache Mode: In KV Cache mode, the model takes in a single previous token and
Hybrid Mode: Hybrid mode leverages the strengths of both AR-N model and KV cache modes to optimize token generation speed. Initially, it uses AR-N model to efficiently generate the prompt's key-value (KV) cache. Then, the mode switches to KV cache mode, which excels at generating subsequent tokens.
- AR-N model: The auto-regression (AR) length determines the number of tokens to consume and the number of logits to produce. Use it to process the prompt and generate the key-value (kv) cache, which serves as a prompt processor in hybrid mode.
- - Prompt processing with AR-N model:
+ - Prompt processing with AR-N model:
Prompt processing is done using a for-loop. An N-token block is taken, and the KV cache is updated for that block. This process is repeated until all tokens are consumed, with the last block potentially requiring padding. For flexibility, the AR-N model can handle any input length less than the maximum sequence length. For TTFT, the input length (or number of blocks) will vary depending on the actual input length, rather than always being the same.
@@ -27,8 +27,8 @@ Hybrid Mode: Hybrid mode leverages the strengths of both AR-N model and KV cache
### Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch.
-2. Follow the [tutorial](https://pytorch.org/executorch/main/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch.
+2. Follow the [tutorial](https://pytorch.org/executorch/0.6/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
### Step 2: Prepare Model
diff --git a/examples/qualcomm/qaihub_scripts/llama/README.md b/examples/qualcomm/qaihub_scripts/llama/README.md
index fbbf8827b4b..976530946bd 100644
--- a/examples/qualcomm/qaihub_scripts/llama/README.md
+++ b/examples/qualcomm/qaihub_scripts/llama/README.md
@@ -11,8 +11,8 @@ Note that the pre-compiled context binaries could not be futher fine-tuned for o
### Instructions
#### Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch.
-2. Follow the [tutorial](https://pytorch.org/executorch/main/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch.
+2. Follow the [tutorial](https://pytorch.org/executorch/0.6/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
#### Step2: Prepare Model
1. Create account for https://aihub.qualcomm.com/
@@ -39,8 +39,8 @@ Note that the pre-compiled context binaries could not be futher fine-tuned for o
### Instructions
#### Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch.
-2. Follow the [tutorial](https://pytorch.org/executorch/main/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch.
+2. Follow the [tutorial](https://pytorch.org/executorch/0.6/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
#### Step2: Prepare Model
1. Create account for https://aihub.qualcomm.com/
@@ -54,4 +54,4 @@ Please refer to [Check context binary version](../../README.md#check-context-bin
```bash
# AIHUB_CONTEXT_BINARIES: ${PATH_TO_AIHUB_WORKSPACE}/build/llama_v3_8b_chat_quantized
python examples/qualcomm/qaihub_scripts/llama/llama3/qaihub_llama3_8b.py -b build-android -s ${SERIAL_NUM} -m ${SOC_MODEL} --context_binaries ${AIHUB_CONTEXT_BINARIES} --tokenizer_model tokenizer.model --prompt "What is baseball?"
-```
\ No newline at end of file
+```
diff --git a/examples/qualcomm/qaihub_scripts/stable_diffusion/README.md b/examples/qualcomm/qaihub_scripts/stable_diffusion/README.md
index d2649cf72c2..336cf46063c 100644
--- a/examples/qualcomm/qaihub_scripts/stable_diffusion/README.md
+++ b/examples/qualcomm/qaihub_scripts/stable_diffusion/README.md
@@ -10,8 +10,8 @@ The model architecture, scheduler, and time embedding are from the [stabilityai/
### Instructions
#### Step 1: Setup
-1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch.
-2. Follow the [tutorial](https://pytorch.org/executorch/main/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
+1. Follow the [tutorial](https://pytorch.org/executorch/0.6/getting-started-setup) to set up ExecuTorch.
+2. Follow the [tutorial](https://pytorch.org/executorch/0.6/backends-qualcomm) to build Qualcomm AI Engine Direct Backend.
#### Step2: Prepare Model
1. Download the context binaries for TextEncoder, UNet, and VAEDecoder under https://huggingface.co/qualcomm/Stable-Diffusion-v2.1/tree/main
diff --git a/examples/xnnpack/README.md b/examples/xnnpack/README.md
index f6c292fa3d1..86f3f9da5d3 100644
--- a/examples/xnnpack/README.md
+++ b/examples/xnnpack/README.md
@@ -1,8 +1,8 @@
# XNNPACK Backend
[XNNPACK](https://github.com/google/XNNPACK) is a library of optimized neural network operators for ARM and x86 CPU platforms. Our delegate lowers models to run using these highly optimized CPU operators. You can try out lowering and running some example models in the demo. Please refer to the following docs for information on the XNNPACK Delegate
-- [XNNPACK Backend Delegate Overview](https://pytorch.org/executorch/main/backends-xnnpack)
-- [XNNPACK Delegate Export Tutorial](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering)
+- [XNNPACK Backend Delegate Overview](https://pytorch.org/executorch/0.6/backends-xnnpack)
+- [XNNPACK Delegate Export Tutorial](https://pytorch.org/executorch/0.6/tutorial-xnnpack-delegate-lowering)
## Directory structure
diff --git a/extension/benchmark/apple/Benchmark/README.md b/extension/benchmark/apple/Benchmark/README.md
index e19a62bcae5..133ab0fce32 100644
--- a/extension/benchmark/apple/Benchmark/README.md
+++ b/extension/benchmark/apple/Benchmark/README.md
@@ -33,7 +33,7 @@ This command performs a shallow clone to speed up the process.
The Benchmark App is configured to use a Swift PM package that provides the prebuilt ExecuTorch frameworks.
-By default, the app relies on the package referencing locally built binaries. To ensure it functions correctly, you must first build the frameworks by following the [guide](https://pytorch.org/executorch/main/using-executorch-ios#building-from-source).
+By default, the app relies on the package referencing locally built binaries. To ensure it functions correctly, you must first build the frameworks by following the [guide](https://pytorch.org/executorch/0.6/using-executorch-ios#building-from-source).
## Adding Models and Resources
diff --git a/extension/llm/export/partitioner_lib.py b/extension/llm/export/partitioner_lib.py
index 20604bbf635..d0991d5f1de 100644
--- a/extension/llm/export/partitioner_lib.py
+++ b/extension/llm/export/partitioner_lib.py
@@ -57,7 +57,7 @@ def get_mps_partitioner(use_kv_cache: bool = False):
)
except ImportError:
raise ImportError(
- "Please install the MPS backend follwing https://pytorch.org/executorch/main/backends-mps"
+ "Please install the MPS backend follwing https://pytorch.org/executorch/0.6/backends-mps"
)
compile_specs = [CompileSpec("use_fp16", bytes([True]))]
@@ -81,7 +81,7 @@ def get_coreml_partitioner(
)
except ImportError:
raise ImportError(
- "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends-coreml"
+ "Please install the CoreML backend follwing https://pytorch.org/executorch/0.6/backends-coreml"
+ "; for buck users, please add example dependancies: //executorch/backends/apple/coreml:backend, and etc"
)
@@ -195,7 +195,7 @@ def get_qnn_partitioner(
)
except ImportError:
raise ImportError(
- "Please install the Qualcomm backend following https://pytorch.org/executorch/main/backends-qualcomm"
+ "Please install the Qualcomm backend following https://pytorch.org/executorch/0.6/backends-qualcomm"
)
use_fp16 = True
diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py
index 38137405f79..e10fa482544 100644
--- a/extension/llm/export/quantizer_lib.py
+++ b/extension/llm/export/quantizer_lib.py
@@ -158,7 +158,7 @@ def get_qnn_quantizer(
except ImportError:
raise ImportError(
- "Please install the Qualcomm backend follwing https://pytorch.org/executorch/main/backends-qualcomm"
+ "Please install the Qualcomm backend follwing https://pytorch.org/executorch/0.6/backends-qualcomm"
)
backend, quant_config = pt2e_quantize.split("_")
@@ -217,7 +217,7 @@ def get_coreml_quantizer(pt2e_quantize: str):
from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
except ImportError:
raise ImportError(
- "Please install the CoreML backend follwing https://pytorch.org/executorch/main/backends-coreml"
+ "Please install the CoreML backend follwing https://pytorch.org/executorch/0.6/backends-coreml"
)
if pt2e_quantize == "coreml_8a_c8w":