Skip to content

Commit 4bc228e

Browse files
committed
Update on "[ET-VK] Adding PushConstantDataInfo input to PrepackNode class."
This diff adds a new input to the PrepackNode class called PushConstantDataInfo. This input is used to pass push constant data to the shader. Differential Revision: [D70102043](https://our.internmc.facebook.com/intern/diff/D70102043/) [ghstack-poisoned]
2 parents 82291ac + af5590d commit 4bc228e

File tree

69 files changed

+1586
-440
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1586
-440
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
27e35de6c288bffad1b4d18b393579c1d1a95547
1+
08434df1f2f88c9770e59246caa2ff9c6f613270

.ci/scripts/test_model.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,14 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103-
if [[ "${MODEL_NAME}" == "phi4_mini" ]]; then
103+
if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
104104
# Install requirements for export_llama
105105
bash examples/models/llama/install_requirements.sh
106106
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107107
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
108108
run_portable_executor_runner
109109
rm "./${MODEL_NAME}.pte"
110+
return
110111
fi
111112

112113
# Export a basic .pte and run the model.

.github/workflows/android-perf.yml

Lines changed: 57 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,63 @@ jobs:
9696
9797
PYTHONPATH="${PWD}" python .ci/scripts/gather_benchmark_configs.py $ARGS
9898
99+
prepare-test-specs:
100+
runs-on: linux.2xlarge
101+
needs: set-parameters
102+
strategy:
103+
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
104+
fail-fast: false
105+
steps:
106+
- uses: actions/checkout@v3
107+
108+
- name: Prepare the spec
109+
id: prepare
110+
shell: bash
111+
env:
112+
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
113+
working-directory: extension/benchmark/android/benchmark
114+
run: |
115+
set -eux
116+
117+
# The model will be exported in the next step to this S3 path
118+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
119+
# We could write a script to properly use jinja here, but there is only one variable,
120+
# so let's just sed it
121+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
122+
123+
BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
124+
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
125+
# later by the upload script
126+
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
127+
128+
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
129+
# Just print the test spec for debugging
130+
cat android-llm-device-farm-test-spec.yml
131+
132+
# Save the benchmark configs so that we can use it later in the dashboard
133+
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
134+
echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
135+
136+
- name: Upload the spec
137+
uses: seemethere/upload-artifact-s3@v5
138+
with:
139+
s3-bucket: gha-artifacts
140+
s3-prefix: |
141+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
142+
retention-days: 1
143+
if-no-files-found: error
144+
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
145+
146+
- name: Update the benchmark configs
147+
uses: seemethere/upload-artifact-s3@v5
148+
with:
149+
s3-bucket: gha-artifacts
150+
s3-prefix: |
151+
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
152+
retention-days: 1
153+
if-no-files-found: error
154+
path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
155+
99156
export-models:
100157
name: export-models
101158
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -278,69 +335,6 @@ jobs:
278335
fi
279336
echo "::endgroup::"
280337
281-
prepare-test-specs:
282-
runs-on: linux.2xlarge
283-
needs:
284-
- set-parameters
285-
- export-models
286-
strategy:
287-
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
288-
fail-fast: false
289-
steps:
290-
- uses: actions/checkout@v3
291-
292-
- name: Prepare the spec
293-
id: prepare
294-
shell: bash
295-
env:
296-
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
297-
working-directory: extension/benchmark/android/benchmark
298-
run: |
299-
set -eux
300-
301-
# The model will be exported in the next step to this S3 path
302-
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
303-
304-
# Check if the model artifact exists, fail this step skip generating test-spec.
305-
curl -s --head -f ${MODEL_PATH}
306-
307-
# We could write a script to properly use jinja here, but there is only one variable,
308-
# so let's just sed it
309-
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
310-
311-
BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
312-
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
313-
# later by the upload script
314-
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' android-llm-device-farm-test-spec.yml.j2
315-
316-
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
317-
# Just print the test spec for debugging
318-
cat android-llm-device-farm-test-spec.yml
319-
320-
# Save the benchmark configs so that we can use it later in the dashboard
321-
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
322-
echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
323-
324-
- name: Upload the spec
325-
uses: seemethere/upload-artifact-s3@v5
326-
with:
327-
s3-bucket: gha-artifacts
328-
s3-prefix: |
329-
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
330-
retention-days: 1
331-
if-no-files-found: error
332-
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
333-
334-
- name: Update the benchmark configs
335-
uses: seemethere/upload-artifact-s3@v5
336-
with:
337-
s3-bucket: gha-artifacts
338-
s3-prefix: |
339-
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
340-
retention-days: 1
341-
if-no-files-found: error
342-
path: extension/benchmark/android/benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
343-
344338
build-benchmark-app:
345339
name: build-benchmark-app
346340
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

.github/workflows/apple-perf.yml

Lines changed: 57 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,63 @@ jobs:
9898
9999
echo "benchmark_configs is: ${{ steps.set-parameters.outputs.benchmark_configs }}"
100100
101+
prepare-test-specs:
102+
runs-on: linux.2xlarge
103+
needs: set-parameters
104+
strategy:
105+
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
106+
fail-fast: false
107+
steps:
108+
- uses: actions/checkout@v3
109+
110+
- name: Prepare the spec
111+
id: prepare
112+
shell: bash
113+
env:
114+
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
115+
working-directory: extension/benchmark/apple/Benchmark
116+
run: |
117+
set -eux
118+
119+
# The model will be exported in the next step to this S3 path
120+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
121+
# We could write a script to properly use jinja here, but there is only one variable,
122+
# so let's just sed it
123+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
124+
125+
BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
126+
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
127+
# later by the upload script
128+
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
129+
130+
cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
131+
# Just print the test spec for debugging
132+
cat default-ios-device-farm-appium-test-spec.yml
133+
134+
# Save the benchmark configs so that we can use it later in the dashboard
135+
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
136+
echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
137+
138+
- name: Upload the spec
139+
uses: seemethere/upload-artifact-s3@v5
140+
with:
141+
s3-bucket: gha-artifacts
142+
s3-prefix: |
143+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
144+
retention-days: 1
145+
if-no-files-found: error
146+
path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
147+
148+
- name: Update the benchmark configs
149+
uses: seemethere/upload-artifact-s3@v5
150+
with:
151+
s3-bucket: gha-artifacts
152+
s3-prefix: |
153+
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
154+
retention-days: 1
155+
if-no-files-found: error
156+
path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
157+
101158
export-models:
102159
name: export-models
103160
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -287,68 +344,6 @@ jobs:
287344
fi
288345
echo "::endgroup::"
289346
290-
prepare-test-specs:
291-
runs-on: linux.2xlarge
292-
needs:
293-
- set-parameters
294-
- export-models
295-
strategy:
296-
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }}
297-
fail-fast: false
298-
steps:
299-
- uses: actions/checkout@v3
300-
301-
- name: Prepare the spec
302-
id: prepare
303-
shell: bash
304-
env:
305-
BENCHMARK_CONFIG: ${{ toJSON(matrix) }}
306-
working-directory: extension/benchmark/apple/Benchmark
307-
run: |
308-
set -eux
309-
310-
# The model will be exported in the next step to this S3 path
311-
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}/model.zip"
312-
# Check if the model artifact exists, fail this step skip generating test-spec.
313-
curl -s --head -f ${MODEL_PATH}
314-
# We could write a script to properly use jinja here, but there is only one variable,
315-
# so let's just sed it
316-
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' default-ios-device-farm-appium-test-spec.yml.j2
317-
318-
BENCHMARK_CONFIG_ID=$(echo "${{ matrix.model }}_${{ matrix.config }}" | sed -e 's/[^A-Za-z0-9._-]/_/g')
319-
# The config for this benchmark runs, we save it in the test spec so that it can be fetched
320-
# later by the upload script
321-
sed -i -e 's,{{ benchmark_config_id }},'"${BENCHMARK_CONFIG_ID}"',g' default-ios-device-farm-appium-test-spec.yml.j2
322-
323-
cp default-ios-device-farm-appium-test-spec.yml.j2 default-ios-device-farm-appium-test-spec.yml
324-
# Just print the test spec for debugging
325-
cat default-ios-device-farm-appium-test-spec.yml
326-
327-
# Save the benchmark configs so that we can use it later in the dashboard
328-
echo "${BENCHMARK_CONFIG}" > "${BENCHMARK_CONFIG_ID}.json"
329-
echo "benchmark-config-id=${BENCHMARK_CONFIG_ID}" >> $GITHUB_OUTPUT
330-
331-
- name: Upload the spec
332-
uses: seemethere/upload-artifact-s3@v5
333-
with:
334-
s3-bucket: gha-artifacts
335-
s3-prefix: |
336-
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.config }}
337-
retention-days: 1
338-
if-no-files-found: error
339-
path: extension/benchmark/apple/Benchmark/default-ios-device-farm-appium-test-spec.yml
340-
341-
- name: Update the benchmark configs
342-
uses: seemethere/upload-artifact-s3@v5
343-
with:
344-
s3-bucket: gha-artifacts
345-
s3-prefix: |
346-
${{ github.repository }}/${{ github.run_id }}/artifacts/benchmark-configs/
347-
retention-days: 1
348-
if-no-files-found: error
349-
path: extension/benchmark/apple/Benchmark/${{ steps.prepare.outputs.benchmark-config-id }}.json
350-
351-
352347
build-benchmark-app:
353348
name: build-benchmark-app
354349
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

.github/workflows/update-viablestrict.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ concurrency:
1212
jobs:
1313
do_update_viablestrict:
1414
if: ${{ github.repository_owner == 'pytorch' }}
15-
runs-on: ubuntu-20.04
15+
runs-on: ubuntu-22.04
1616
environment: ${{ (github.event_name == 'schedule') && 'update-viable-strict' || '' }}
1717
steps:
1818
- name: Update viable/strict

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,9 +749,9 @@ endif()
749749

750750
if(EXECUTORCH_BUILD_PTHREADPOOL
751751
AND EXECUTORCH_BUILD_CPUINFO
752-
AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
753752
)
754753
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
754+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
755755
endif()
756756

757757
if(EXECUTORCH_BUILD_PYBIND)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
RetraceFoldedDtypesPass,
5252
)
5353
from executorch.backends.arm._passes.fuse_batchnorm2d_pass import FuseBatchnorm2DPass
54+
from executorch.backends.arm._passes.fuse_constant_ops_pass import FuseConstantOpsPass
5455
from executorch.backends.arm._passes.fuse_quantized_activation_pass import ( # type: ignore[import-not-found]
5556
FuseQuantizedActivationPass,
5657
)
@@ -78,6 +79,7 @@
7879
UnsqueezeScalarPlaceholdersPass,
7980
)
8081
from executorch.backends.arm.tosa_specification import TosaSpecification
82+
from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
8183

8284
from executorch.backends.transforms.replace_scalar_with_tensor import (
8385
ReplaceScalarWithTensorArgPass,
@@ -114,7 +116,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
114116
self.add_pass(QuantizeOperatorArguments())
115117
self.add_pass(FoldAndAnnotateQParamsPass()) # type: ignore[call-arg]
116118
self.add_pass(RetraceFoldedDtypesPass())
117-
self.add_pass(InsertTableOpsPass(exported_program))
118119

119120
self.add_pass(RemoveClonePass())
120121
self.add_pass(SizeAdjustConv2DPass())
@@ -128,8 +129,12 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
128129
self.add_pass(DecomposeSelectPass())
129130
self.add_pass(ConvertSqueezesToViewPass())
130131

132+
self.add_pass(FuseViewCopyTransform())
133+
self.add_pass(FuseConstantOpsPass(exported_program))
134+
self.add_pass(InsertTableOpsPass(exported_program))
131135
self.add_pass(AnnotateChannelsLastDimOrder())
132136
self.add_pass(InsertRescalePass())
137+
133138
return self._transform(exported_program.graph_module)
134139

135140
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
@@ -155,7 +160,6 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
155160
self.add_pass(QuantizeOperatorArguments())
156161
self.add_pass(FoldAndAnnotateQParamsPass()) # type: ignore[call-arg]
157162
self.add_pass(RetraceFoldedDtypesPass())
158-
self.add_pass(InsertTableOpsPass(exported_program))
159163

160164
self.add_pass(RemoveClonePass())
161165
self.add_pass(SizeAdjustConv2DPass())
@@ -169,6 +173,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
169173
self.add_pass(DecomposeSelectPass())
170174
self.add_pass(ConvertSqueezesToViewPass())
171175

176+
self.add_pass(FuseViewCopyTransform())
177+
self.add_pass(FuseConstantOpsPass(exported_program))
178+
self.add_pass(InsertTableOpsPass(exported_program))
172179
self.add_pass(AnnotateChannelsLastDimOrder())
173180
self.add_pass(InsertRescalePass())
174181

backends/arm/_passes/arm_pass_utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
)
2727
from torch._ops import OpOverload
2828
from torch._subclasses.fake_tensor import FakeTensor
29+
from torch.export.graph_signature import InputKind
2930

3031

3132
def is_get_attr_node(node: torch.fx.Node) -> bool:
@@ -44,6 +45,30 @@ def is_param_node(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool:
4445
)
4546

4647

48+
def get_constant_placeholder_kind(
49+
exp_prog: ExportedProgram, node: torch.fx.Node
50+
) -> InputKind:
51+
if is_param(exp_prog, node):
52+
return InputKind.PARAMETER
53+
if is_buffer(exp_prog, node):
54+
return InputKind.BUFFER
55+
if is_lifted_tensor_constant(exp_prog, node):
56+
return InputKind.CONSTANT_TENSOR
57+
58+
raise RuntimeError("Node is neither PARAMETER, BUFFER nor CONSTANT_TENSOR")
59+
60+
61+
def is_persistent_buffer(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool | None:
62+
if is_buffer(exp_prog, node):
63+
buffer_name = exp_prog.graph_signature.inputs_to_buffers[node.name]
64+
if buffer_name in exp_prog.graph_signature.non_persistent_buffers:
65+
return False
66+
else:
67+
return True
68+
69+
return None
70+
71+
4772
def get_param_tensor(
4873
exp_prog: ExportedProgram, node: torch.fx.Node
4974
) -> Optional[torch.Tensor]:

0 commit comments

Comments
 (0)