Skip to content

Commit ab6f19a

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Use new API to register custom ops for llama model (pytorch#2916)
Summary: Retry of D55713944 Differential Revision: D55856491
1 parent fcefd10 commit ab6f19a

26 files changed

+455
-317
lines changed

.ci/scripts/test_llama.sh

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ if [[ -z "${MODE:-}" ]]; then
3737
exit 1
3838
fi
3939

40+
if [[ "${MODE}" =~ xnnpack.* ]]; then
41+
XNNPACK=ON
42+
else
43+
XNNPACK=OFF
44+
fi
45+
46+
if [[ "${MODE}" =~ .*custom.* ]]; then
47+
CUSTOM=ON
48+
else
49+
CUSTOM=OFF
50+
fi
51+
4052
if [[ -z "${BUCK:-}" ]]; then
4153
BUCK=buck2
4254
fi
@@ -47,38 +59,35 @@ fi
4759

4860
which "${PYTHON_EXECUTABLE}"
4961

50-
5162
cmake_install_executorch_libraries() {
5263
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
5364
rm -rf cmake-out
54-
if [[ "${MODE}" == "xnnpack" ]]; then
55-
XNNPACK=ON
56-
else
57-
XNNPACK=OFF
58-
fi
5965
retry cmake -DBUCK2="$BUCK" \
6066
-DCMAKE_INSTALL_PREFIX=cmake-out \
61-
-DCMAKE_BUILD_TYPE=Release \
67+
-DCMAKE_BUILD_TYPE=Debug \
6268
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
6369
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
70+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
6471
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
6572
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
6673
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6774
-Bcmake-out .
68-
cmake --build cmake-out -j9 --target install --config Release
75+
cmake --build cmake-out -j9 --target install --config Debug
6976
}
7077

7178
cmake_build_llama_runner() {
7279
echo "Building llama runner"
7380
dir="examples/models/llama2"
7481
retry cmake -DBUCK2="$BUCK" \
7582
-DCMAKE_INSTALL_PREFIX=cmake-out \
76-
-DCMAKE_BUILD_TYPE=Release \
83+
-DCMAKE_BUILD_TYPE=Debug \
84+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
7785
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
86+
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
7887
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
7988
-Bcmake-out/${dir} \
8089
${dir}
81-
cmake --build cmake-out/${dir} -j9 --config Release
90+
cmake --build cmake-out/${dir} -j9 --config Debug
8291

8392
}
8493

@@ -113,13 +122,18 @@ else
113122
exit 1
114123
fi
115124

125+
# Install custom ops before exporting
126+
echo "Installing executorch libraries"
127+
cmake_install_executorch_libraries
128+
116129
# Export model.
117130
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
118131
echo "Exporting ${EXPORTED_MODEL_NAME}"
119132
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
120-
if [[ "${MODE}" == "xnnpack" ]]; then
133+
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
121134
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
122135
fi
136+
# Add dynamically linked library location
123137
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
124138

125139
# Create tokenizer.bin.
@@ -135,7 +149,6 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
135149
# shellcheck source=/dev/null
136150
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
137151
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
138-
cmake_install_executorch_libraries
139152
cmake_build_llama_runner
140153
# Run llama runner
141154
NOW=$(date +"%H:%M:%S")

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
matrix:
9191
dtype: [fp32]
9292
build-tool: [buck2, cmake]
93-
mode: [portable, xnnpack]
93+
mode: [portable, xnnpack+kv+custom]
9494
fail-fast: false
9595
with:
9696
runner: linux.2xlarge

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ jobs:
254254
matrix:
255255
dtype: [fp32]
256256
build-tool: [buck2, cmake]
257-
mode: [portable, xnnpack]
257+
mode: [portable, xnnpack+kv+custom]
258258
fail-fast: false
259259
with:
260260
runner: macos-m1-stable

CMakeLists.txt

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
144144

145145
option(EXECUTORCH_BUILD_CUSTOM "Build the custom kernels" OFF)
146146

147+
option(EXECUTORCH_BUILD_CUSTOM_OPS_AOT "Build the custom ops lib for AOT" OFF)
148+
147149
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
148150
OFF)
149151

@@ -175,17 +177,29 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
175177
#
176178
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
177179
#
178-
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
179-
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
180+
cmake_dependent_option(
181+
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
182+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
180183

181184
#
182185
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
183186
#
184187
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
185188
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
186189

190+
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
191+
set(EXECUTORCH_BUILD_CUSTOM ON)
192+
endif()
193+
194+
if(EXECUTORCH_BUILD_CUSTOM)
195+
set(EXECUTORCH_BUILD_OPTIMIZED ON)
196+
endif()
197+
187198
if(EXECUTORCH_BUILD_CPUINFO)
188199
# --- cpuinfo
200+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
201+
${CMAKE_POSITION_INDEPENDENT_CODE})
202+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
189203
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
190204
set(CPUINFO_BUILD_TOOLS
191205
OFF
@@ -207,10 +221,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
207221
CACHE STRING "")
208222
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
209223
add_subdirectory("${CPUINFO_SOURCE_DIR}")
224+
set(CMAKE_POSITION_INDEPENDENT_CODE
225+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
210226
endif()
211227

212228
if(EXECUTORCH_BUILD_PTHREADPOOL)
213229
# --- pthreadpool
230+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
231+
${CMAKE_POSITION_INDEPENDENT_CODE})
232+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
214233
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
215234
set(PTHREADPOOL_BUILD_TESTS
216235
OFF
@@ -230,6 +249,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
230249
CACHE STRING "")
231250
endif()
232251
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
252+
set(CMAKE_POSITION_INDEPENDENT_CODE
253+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
233254
endif()
234255

235256
if(NOT PYTHON_EXECUTABLE)
@@ -352,23 +373,28 @@ add_subdirectory(schema)
352373
# Only contains primitive operators; does not contain portable kernels or other
353374
# full operators. Does not contain any backends.
354375
#
355-
356-
add_library(executorch ${_executorch__srcs})
357-
target_link_libraries(executorch PRIVATE program_schema)
358-
target_link_options_shared_lib(executorch)
376+
add_library(executorch_no_prim_ops ${_executorch_no_prim_ops__srcs})
377+
target_link_libraries(executorch_no_prim_ops PRIVATE program_schema)
359378
# Check if dl exists for this toolchain and only then link it.
360379
find_library(DL_LIBRARY_EXISTS NAMES dl)
361380
# Check if the library was found
362381
if(DL_LIBRARY_EXISTS)
363-
target_link_libraries(executorch PRIVATE dl) # For dladdr()
382+
target_link_libraries(executorch_no_prim_ops PRIVATE dl) # For dladdr()
364383
endif()
365-
target_include_directories(executorch PUBLIC ${_common_include_directories})
366-
target_compile_options(executorch PUBLIC ${_common_compile_options})
384+
target_include_directories(executorch_no_prim_ops PUBLIC ${_common_include_directories})
385+
target_compile_options(executorch_no_prim_ops PUBLIC ${_common_compile_options})
367386
if(MAX_KERNEL_NUM)
368-
target_compile_definitions(executorch
387+
target_compile_definitions(executorch_no_prim_ops
369388
PRIVATE MAX_KERNEL_NUM=${MAX_KERNEL_NUM})
370389
endif()
371390

391+
add_library(executorch ${_executorch__srcs})
392+
target_link_libraries(executorch PRIVATE executorch_no_prim_ops)
393+
target_link_libraries(executorch INTERFACE program_schema)
394+
target_include_directories(executorch PUBLIC ${_common_include_directories})
395+
target_compile_options(executorch PUBLIC ${_common_compile_options})
396+
target_link_options_shared_lib(executorch)
397+
372398
#
373399
# portable_ops_lib: A library to register core ATen ops using portable kernels,
374400
# see kernels/portable/CMakeLists.txt.
@@ -504,25 +530,42 @@ if(EXECUTORCH_BUILD_PYBIND)
504530
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
505531
endif()
506532

533+
# find pytorch lib, to allow pybind to take at::Tensor as input/output
534+
find_package(Torch CONFIG REQUIRED)
535+
find_library(TORCH_PYTHON_LIBRARY torch_python
536+
PATHS "${TORCH_INSTALL_PREFIX}/lib")
537+
538+
set(_dep_libs
539+
${TORCH_PYTHON_LIBRARY}
540+
bundled_program
541+
etdump
542+
executorch
543+
extension_data_loader
544+
portable_ops_lib
545+
util
546+
torch)
547+
507548
if(EXECUTORCH_BUILD_COREML)
508-
set(PYBIND_LINK_COREML "coremldelegate")
549+
list(APPEND _dep_libs coremldelegate)
509550
endif()
510551

511552
if(EXECUTORCH_BUILD_MPS)
512-
set(PYBIND_LINK_MPS "mpsdelegate")
553+
list(APPEND _dep_libs mpsdelegate)
513554
endif()
514555

515556
if(EXECUTORCH_BUILD_XNNPACK)
516-
# need to explicitly specify XNNPACK here
517-
# otherwise uses XNNPACK symbols from libtorch_cpu
518-
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
557+
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
558+
# from libtorch_cpu
559+
list(APPEND _dep_libs xnnpack_backend XNNPACK)
519560
endif()
520561

521-
# find pytorch lib, to allow pybind to take at::Tensor as input/output
522-
find_package(Torch CONFIG REQUIRED)
523-
find_library(TORCH_PYTHON_LIBRARY torch_python
524-
PATHS "${TORCH_INSTALL_PREFIX}/lib")
562+
if(EXECUTORCH_BUILD_CUSTOM)
563+
list(APPEND _dep_libs custom_ops)
564+
endif()
525565

566+
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
567+
list(APPEND _dep_libs custom_ops_aot_lib)
568+
endif()
526569
# compile options for pybind
527570

528571
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
@@ -544,19 +587,7 @@ if(EXECUTORCH_BUILD_PYBIND)
544587
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
545588
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
546589
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
547-
target_link_libraries(
548-
portable_lib
549-
PUBLIC ${TORCH_PYTHON_LIBRARY}
550-
bundled_program
551-
etdump
552-
executorch
553-
extension_data_loader
554-
portable_ops_lib
555-
util
556-
torch
557-
${PYBIND_LINK_COREML}
558-
${PYBIND_LINK_MPS}
559-
${PYBIND_LINK_XNNPACK})
590+
target_link_libraries(portable_lib PUBLIC ${_dep_libs})
560591

561592
install(TARGETS portable_lib
562593
LIBRARY DESTINATION executorch/extension/pybindings)

build/cmake_deps.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,18 @@ excludes = [
1919
buck_targets = [
2020
"//runtime/executor:program",
2121
]
22+
deps = [
23+
"executorch_no_prim_ops",
24+
]
25+
filters = [
26+
".cpp$",
27+
]
28+
29+
30+
[targets.executorch_no_prim_ops]
31+
buck_targets = [
32+
"//runtime/executor:program_no_prim_ops",
33+
]
2234
deps = [
2335
"program_schema",
2436
]

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
1818
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
19+
-DEXECUTORCH_BUILD_CUSTOM=ON \
1920
-DCMAKE_BUILD_TYPE=Release \
2021
-B"${CMAKE_OUT}"
2122

0 commit comments

Comments
 (0)