Skip to content

Commit f136f86

Browse files
committed
Update base for Update on "Dont quantize the current token for attention"
Differential Revision: [D63497872](https://our.internmc.facebook.com/intern/diff/D63497872/) [ghstack-poisoned]
2 parents 7e7edaf + c726a9b commit f136f86

File tree

190 files changed

+9896
-1407
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

190 files changed

+9896
-1407
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
CUSTOM_RUNNERS = {
2121
"linux": {
2222
# This one runs OOM on smaller runner, the root cause is unclear (T163016365)
23-
"w2l": "linux.12xlarge",
24-
"ic4": "linux.12xlarge",
25-
"resnet50": "linux.12xlarge",
26-
"llava": "linux.12xlarge",
27-
"llama3_2_vision_encoder": "linux.12xlarge",
28-
# "llama3_2_text_decoder": "linux.12xlarge", # TODO: re-enable test when Huy's change is in / model gets smaller.
23+
"w2l": "linux.4xlarge.memory",
24+
"ic4": "linux.4xlarge.memory",
25+
"resnet50": "linux.4xlarge.memory",
26+
"llava": "linux.4xlarge.memory",
27+
"llama3_2_vision_encoder": "linux.4xlarge.memory",
28+
"llama3_2_text_decoder": "linux.4xlarge.memory",
2929
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
30-
"dl3": "linux.12xlarge",
31-
"emformer_join": "linux.12xlarge",
32-
"emformer_predict": "linux.12xlarge",
30+
"dl3": "linux.4xlarge.memory",
31+
"emformer_join": "linux.4xlarge.memory",
32+
"emformer_predict": "linux.4xlarge.memory",
3333
}
3434
}
3535

@@ -39,10 +39,12 @@
3939
"linux": {
4040
"mobilebert": 90,
4141
"emformer_predict": 360,
42+
"llama3_2_text_decoder": 360,
4243
},
4344
"macos": {
4445
"mobilebert": 90,
4546
"emformer_predict": 360,
47+
"llama3_2_text_decoder": 360,
4648
},
4749
}
4850

.ci/scripts/setup-macos.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ install_buck() {
4949

5050
rm "${BUCK2}"
5151
popd
52+
53+
# Kill all running buck2 daemon for a fresh start
54+
buck2 killall || true
5255
}
5356

5457
function write_sccache_stub() {

.ci/scripts/test_llama.sh

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2"
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -44,6 +48,12 @@ MODE=${MODE:-"xnnpack+custom"}
4448
# Default UPLOAD_DIR to empty string if not set
4549
UPLOAD_DIR="${UPLOAD_DIR:-}"
4650

51+
# Default PT2E_QUANTIZE to empty string if not set
52+
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
53+
54+
# Default CMake Build Type to release mode
55+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
56+
4757
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
4858
echo "Expecting atleast 4 positional arguments"
4959
echo "Usage: [...]"
@@ -136,7 +146,7 @@ cmake_install_executorch_libraries() {
136146
rm -rf cmake-out
137147
retry cmake \
138148
-DCMAKE_INSTALL_PREFIX=cmake-out \
139-
-DCMAKE_BUILD_TYPE=Debug \
149+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
140150
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
141151
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
142152
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -150,22 +160,22 @@ cmake_install_executorch_libraries() {
150160
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
151161
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
152162
-Bcmake-out .
153-
cmake --build cmake-out -j9 --target install --config Debug
163+
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
154164
}
155165

156166
cmake_build_llama_runner() {
157167
echo "Building llama runner"
158168
dir="examples/models/llama"
159169
retry cmake \
160170
-DCMAKE_INSTALL_PREFIX=cmake-out \
161-
-DCMAKE_BUILD_TYPE=Debug \
171+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
162172
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
163173
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
164174
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
165175
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
166176
-Bcmake-out/${dir} \
167177
${dir}
168-
cmake --build cmake-out/${dir} -j9 --config Debug
178+
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
169179

170180
}
171181

@@ -234,6 +244,10 @@ if [[ "${COREML}" == "ON" ]]; then
234244
fi
235245
if [[ "${QNN}" == "ON" ]]; then
236246
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
247+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
248+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
249+
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
250+
fi
237251
fi
238252
# Add dynamically linked library location
239253
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.ci/scripts/test_llava.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
set -exu
99
# shellcheck source=/dev/null
1010

11-
BUILD_TYPE=${1:-Debug}
1211
TARGET_OS=${2:-Native}
1312
BUILD_DIR=${3:-cmake-out}
13+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
1414

15-
echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
15+
echo "Building with CMAKE_BUILD_TYPE: $CMAKE_BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
1616

1717
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1818
PYTHON_EXECUTABLE=python3
@@ -32,7 +32,7 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi
3232

3333
EXECUTORCH_COMMON_CMAKE_ARGS=" \
3434
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
35-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
35+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
3636
-DEXECUTORCH_ENABLE_LOGGING=ON \
3737
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3838
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -49,7 +49,7 @@ cmake_install_executorch_libraries() {
4949
${EXECUTORCH_COMMON_CMAKE_ARGS} \
5050
-B${BUILD_DIR} .
5151

52-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
52+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
5353
}
5454

5555
cmake_install_executorch_libraries_for_android() {
@@ -59,14 +59,14 @@ cmake_install_executorch_libraries_for_android() {
5959
${EXECUTORCH_COMMON_CMAKE_ARGS} \
6060
-B${BUILD_DIR} .
6161

62-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
62+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
6363
}
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
6767
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6868
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
69+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7070
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7171
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7272
-DEXECUTORCH_BUILD_XNNPACK=ON"
@@ -81,7 +81,7 @@ cmake_build_llava_runner() {
8181
-B${BUILD_DIR}/${dir} \
8282
${dir}
8383

84-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
84+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
8585
}
8686

8787

@@ -98,7 +98,7 @@ cmake_build_llava_runner_for_android() {
9898
-B${BUILD_DIR}/${dir} \
9999
${dir}
100100

101-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
101+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
102102
}
103103

104104
# only export the one without custom op for now since it's

.github/workflows/apple.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ jobs:
4242
4343
build-demo-ios:
4444
name: build-demo-ios
45+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
46+
if: ${{ !github.event.pull_request.head.repo.fork }}
4547
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4648
secrets: inherit
4749
with:
@@ -190,6 +192,8 @@ jobs:
190192
) done
191193
192194
upload-frameworks-ios:
195+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
196+
if: ${{ !github.event.pull_request.head.repo.fork }}
193197
runs-on: ubuntu-22.04
194198
needs: [build-frameworks-ios, set-version]
195199
timeout-minutes: 30
@@ -278,6 +282,8 @@ jobs:
278282
279283
build-benchmark-app:
280284
name: build-benchmark-app
285+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
286+
if: ${{ !github.event.pull_request.head.repo.fork }}
281287
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
282288
secrets: inherit
283289
with:

.github/workflows/build-wheels-linux.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/build-wheels-m1.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/ghstack_land.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,7 @@ on:
33
pull_request:
44
types: [closed]
55
branches:
6-
- 'gh/cccclai/[0-9]+/base'
7-
- 'gh/dbort/[0-9]+/base'
8-
- 'gh/dvorjackz/[0-9]+/base'
9-
- 'gh/guangy10/[0-9]+/base'
10-
- 'gh/helunwencser/[0-9]+/base'
11-
- 'gh/jorgep31415/[0-9]+/base'
12-
- 'gh/kimishpatel/[0-9]+/base'
13-
- 'gh/kirklandsign/[0-9]+/base'
14-
- 'gh/larryliu0820/[0-9]+/base'
15-
- 'gh/lucylq/[0-9]+/base'
16-
- 'gh/manuelcandales/[0-9]+/base'
17-
- 'gh/mcr229/[0-9]+/base'
18-
- 'gh/swolchok/[0-9]+/base'
19-
- 'gh/SS-JIA/[0-9]+/base'
20-
- 'gh/trivedivivek/[0-9]+/base'
6+
- 'gh/*/[0-9]+/base'
217

228
jobs:
239
ghstack_merge_to_main:

.github/workflows/pull.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ jobs:
332332
docker-image: executorch-ubuntu-22.04-clang12
333333

334334
unittest-arm:
335-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
335+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
336336
with:
337337
runner: linux.2xlarge
338338
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -368,6 +368,7 @@ jobs:
368368
strategy:
369369
matrix:
370370
dtype: [fp32]
371+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
371372
mode: [qnn]
372373
fail-fast: false
373374
with:
@@ -384,6 +385,7 @@ jobs:
384385
DTYPE=${{ matrix.dtype }}
385386
BUILD_TOOL="cmake"
386387
MODE=${{ matrix.mode }}
388+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395
# Install requirements for export_llama
394396
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400
test-phi-3-mini-runner-linux:
399401
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ jobs:
131131
132132
test-arm-backend-delegation:
133133
name: test-arm-backend-delegation
134-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
134+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
135135
with:
136136
runner: linux.2xlarge
137137
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -157,7 +157,7 @@ jobs:
157157
158158
test-arm-reference-delegation:
159159
name: test-arm-reference-delegation
160-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
160+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
161161
with:
162162
runner: linux.2xlarge
163163
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -290,7 +290,7 @@ jobs:
290290
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
291291

292292
# # run e2e (export, tokenizer and runner)
293-
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release
293+
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
294294

295295
test-qnn-model:
296296
name: test-qnn-model
@@ -351,6 +351,8 @@ jobs:
351351
done
352352
353353
test-huggingface-transformers:
354+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
355+
if: ${{ !github.event.pull_request.head.repo.fork }}
354356
name: test-huggingface-transformers
355357
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
356358
secrets: inherit
@@ -441,3 +443,39 @@ jobs:
441443
442444
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443445
echo "::endgroup::"
446+
447+
448+
test-llama-runner-qnn-linux:
449+
name: test-llama-runner-qnn-linux
450+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
451+
strategy:
452+
matrix:
453+
dtype: [fp32]
454+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
455+
mode: [qnn]
456+
fail-fast: false
457+
with:
458+
runner: linux.2xlarge
459+
docker-image: executorch-ubuntu-22.04-qnn-sdk
460+
submodules: 'true'
461+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
462+
timeout: 900
463+
script: |
464+
# The generic Linux job chooses to use base env, not the one setup by the image
465+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
466+
conda activate "${CONDA_ENV}"
467+
468+
BUILD_TOOL="cmake"
469+
DTYPE=${{ matrix.dtype }}
470+
MODE=${{ matrix.mode }}
471+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
472+
473+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
474+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
475+
476+
# Setup executorch
477+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
478+
# Install requirements for export_llama
479+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
480+
# Test llama2
481+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
[submodule "third-party/pybind11"]
6565
path = third-party/pybind11
6666
url = https://github.com/pybind/pybind11.git
67+
[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
68+
path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
69+
url = https://github.com/foss-xtensa/nnlib-FusionG3/
6770
[submodule "third-party/ao"]
6871
path = third-party/ao
6972
url = https://github.com/pytorch/ao.git

0 commit comments

Comments
 (0)