Skip to content

Commit 3b385c4

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Use new API to register custom ops for llama model (#2916)
Summary: Retry of D55713944 Differential Revision: D55856491
1 parent 599cfde commit 3b385c4

24 files changed

+400
-293
lines changed

.ci/scripts/test_llama.sh

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ if [[ -z "${MODE:-}" ]]; then
3737
exit 1
3838
fi
3939

40+
if [[ "${MODE}" =~ xnnpack.* ]]; then
41+
XNNPACK=ON
42+
else
43+
XNNPACK=OFF
44+
fi
45+
46+
if [[ "${MODE}" =~ .*custom.* ]]; then
47+
CUSTOM=ON
48+
else
49+
CUSTOM=OFF
50+
fi
51+
4052
if [[ -z "${BUCK:-}" ]]; then
4153
BUCK=buck2
4254
fi
@@ -47,38 +59,35 @@ fi
4759

4860
which "${PYTHON_EXECUTABLE}"
4961

50-
5162
cmake_install_executorch_libraries() {
5263
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
5364
rm -rf cmake-out
54-
if [[ "${MODE}" == "xnnpack" ]]; then
55-
XNNPACK=ON
56-
else
57-
XNNPACK=OFF
58-
fi
5965
retry cmake -DBUCK2="$BUCK" \
6066
-DCMAKE_INSTALL_PREFIX=cmake-out \
61-
-DCMAKE_BUILD_TYPE=Release \
67+
-DCMAKE_BUILD_TYPE=Debug \
6268
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
6369
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
70+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
6471
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
6572
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
6673
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6774
-Bcmake-out .
68-
cmake --build cmake-out -j9 --target install --config Release
75+
cmake --build cmake-out -j9 --target install --config Debug
6976
}
7077

7178
cmake_build_llama_runner() {
7279
echo "Building llama runner"
7380
dir="examples/models/llama2"
7481
retry cmake -DBUCK2="$BUCK" \
7582
-DCMAKE_INSTALL_PREFIX=cmake-out \
76-
-DCMAKE_BUILD_TYPE=Release \
83+
-DCMAKE_BUILD_TYPE=Debug \
84+
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
7785
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
86+
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
7887
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
7988
-Bcmake-out/${dir} \
8089
${dir}
81-
cmake --build cmake-out/${dir} -j9 --config Release
90+
cmake --build cmake-out/${dir} -j9 --config Debug
8291

8392
}
8493

@@ -113,13 +122,18 @@ else
113122
exit 1
114123
fi
115124

125+
# Install custom ops before exporting
126+
echo "Installing executorch libraries"
127+
cmake_install_executorch_libraries
128+
116129
# Export model.
117130
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
118131
echo "Exporting ${EXPORTED_MODEL_NAME}"
119132
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
120-
if [[ "${MODE}" == "xnnpack" ]]; then
133+
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
121134
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
122135
fi
136+
# Add dynamically linked library location
123137
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
124138

125139
# Create tokenizer.bin.
@@ -135,7 +149,6 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
135149
# shellcheck source=/dev/null
136150
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
137151
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
138-
cmake_install_executorch_libraries
139152
cmake_build_llama_runner
140153
# Run llama runner
141154
NOW=$(date +"%H:%M:%S")

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
matrix:
9191
dtype: [fp32]
9292
build-tool: [buck2, cmake]
93-
mode: [portable, xnnpack]
93+
mode: [portable, xnnpack+kv+custom]
9494
fail-fast: false
9595
with:
9696
runner: linux.2xlarge

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ jobs:
254254
matrix:
255255
dtype: [fp32]
256256
build-tool: [buck2, cmake]
257-
mode: [portable, xnnpack]
257+
mode: [portable, xnnpack+kv+custom]
258258
fail-fast: false
259259
with:
260260
runner: macos-m1-stable

CMakeLists.txt

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
144144

145145
option(EXECUTORCH_BUILD_CUSTOM "Build the custom kernels" OFF)
146146

147+
option(EXECUTORCH_BUILD_CUSTOM_OPS_AOT "Build the custom ops lib for AOT" OFF)
148+
147149
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
148150
OFF)
149151

@@ -175,17 +177,29 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
175177
#
176178
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
177179
#
178-
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
179-
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
180+
cmake_dependent_option(
181+
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
182+
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
180183

181184
#
182185
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
183186
#
184187
cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
185188
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
186189

190+
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT)
191+
set(EXECUTORCH_BUILD_CUSTOM ON)
192+
endif()
193+
194+
if(EXECUTORCH_BUILD_CUSTOM)
195+
set(EXECUTORCH_BUILD_OPTIMIZED ON)
196+
endif()
197+
187198
if(EXECUTORCH_BUILD_CPUINFO)
188199
# --- cpuinfo
200+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
201+
${CMAKE_POSITION_INDEPENDENT_CODE})
202+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
189203
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
190204
set(CPUINFO_BUILD_TOOLS
191205
OFF
@@ -207,10 +221,15 @@ if(EXECUTORCH_BUILD_CPUINFO)
207221
CACHE STRING "")
208222
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
209223
add_subdirectory("${CPUINFO_SOURCE_DIR}")
224+
set(CMAKE_POSITION_INDEPENDENT_CODE
225+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
210226
endif()
211227

212228
if(EXECUTORCH_BUILD_PTHREADPOOL)
213229
# --- pthreadpool
230+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
231+
${CMAKE_POSITION_INDEPENDENT_CODE})
232+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
214233
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
215234
set(PTHREADPOOL_BUILD_TESTS
216235
OFF
@@ -230,6 +249,8 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
230249
CACHE STRING "")
231250
endif()
232251
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
252+
set(CMAKE_POSITION_INDEPENDENT_CODE
253+
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
233254
endif()
234255

235256
if(NOT PYTHON_EXECUTABLE)
@@ -504,25 +525,38 @@ if(EXECUTORCH_BUILD_PYBIND)
504525
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
505526
endif()
506527

528+
# find pytorch lib, to allow pybind to take at::Tensor as input/output
529+
find_package(Torch CONFIG REQUIRED)
530+
find_library(TORCH_PYTHON_LIBRARY torch_python
531+
PATHS "${TORCH_INSTALL_PREFIX}/lib")
532+
533+
set(_dep_libs
534+
${TORCH_PYTHON_LIBRARY}
535+
bundled_program
536+
etdump
537+
executorch
538+
extension_data_loader
539+
portable_ops_lib
540+
util
541+
torch)
542+
507543
if(EXECUTORCH_BUILD_COREML)
508-
set(PYBIND_LINK_COREML "coremldelegate")
544+
list(APPEND _dep_libs coremldelegate)
509545
endif()
510546

511547
if(EXECUTORCH_BUILD_MPS)
512-
set(PYBIND_LINK_MPS "mpsdelegate")
548+
list(APPEND _dep_libs mpsdelegate)
513549
endif()
514550

515551
if(EXECUTORCH_BUILD_XNNPACK)
516-
# need to explicitly specify XNNPACK here
517-
# otherwise uses XNNPACK symbols from libtorch_cpu
518-
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
552+
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
553+
# from libtorch_cpu
554+
list(APPEND _dep_libs xnnpack_backend XNNPACK)
519555
endif()
520556

521-
# find pytorch lib, to allow pybind to take at::Tensor as input/output
522-
find_package(Torch CONFIG REQUIRED)
523-
find_library(TORCH_PYTHON_LIBRARY torch_python
524-
PATHS "${TORCH_INSTALL_PREFIX}/lib")
525-
557+
if(EXECUTORCH_BUILD_CUSTOM)
558+
list(APPEND _dep_libs custom_ops custom_ops_aot_lib)
559+
endif()
526560
# compile options for pybind
527561

528562
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
@@ -544,19 +578,7 @@ if(EXECUTORCH_BUILD_PYBIND)
544578
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
545579
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
546580
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
547-
target_link_libraries(
548-
portable_lib
549-
PUBLIC ${TORCH_PYTHON_LIBRARY}
550-
bundled_program
551-
etdump
552-
executorch
553-
extension_data_loader
554-
portable_ops_lib
555-
util
556-
torch
557-
${PYBIND_LINK_COREML}
558-
${PYBIND_LINK_MPS}
559-
${PYBIND_LINK_XNNPACK})
581+
target_link_libraries(portable_lib PUBLIC ${_dep_libs})
560582

561583
install(TARGETS portable_lib
562584
LIBRARY DESTINATION executorch/extension/pybindings)

examples/demo-apps/android/LlamaDemo/setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
1616
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
1717
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
1818
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
19+
-DEXECUTORCH_BUILD_CUSTOM=ON \
1920
-DCMAKE_BUILD_TYPE=Release \
2021
-B"${CMAKE_OUT}"
2122

examples/models/llama2/CMakeLists.txt

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,56 +49,72 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
4949
# Let files say "include <executorch/path/to/header.h>".
5050
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
5151

52-
# For some reason android build is not able to find where gflags is
53-
# and hence cannot find corresponding .cmake file
52+
# For some reason android build is not able to find where gflags is and hence
53+
# cannot find corresponding .cmake file
5454
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
5555
find_package(gflags REQUIRED)
5656

5757
#
5858
# llama_main: test binary to run llama, with tokenizer and sampler integrated
5959
#
60-
add_executable(llama_main main.cpp
61-
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
62-
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
63-
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
64-
endif()
6560

66-
# find `executorch` libraries
67-
# Same as for gflags
61+
# find `executorch` libraries Same as for gflags
6862
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
6963
find_package(executorch CONFIG REQUIRED)
7064
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
7165
target_link_options_shared_lib(executorch)
7266
endif()
7367

7468
# custom ops library
75-
add_subdirectory(custom_ops)
69+
if(EXECUTORCH_BUILD_CUSTOM)
70+
add_subdirectory(custom_ops)
71+
endif()
7672

7773
# llama_runner library
7874
add_subdirectory(runner)
7975

80-
target_include_directories(llama_main PUBLIC
81-
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include)
82-
target_include_directories(llama_main PUBLIC
83-
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)
84-
8576
set(link_libraries)
77+
set(_srcs main.cpp)
8678

8779
if(EXECUTORCH_BUILD_OPTIMIZED)
88-
list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
89-
portable_kernels cpublas eigen_blas)
80+
list(
81+
APPEND
82+
link_libraries
83+
optimized_native_cpu_ops_lib
84+
optimized_kernels
85+
portable_kernels
86+
cpublas
87+
eigen_blas)
9088
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
9189
else()
9290
list(APPEND link_libraries portable_ops_lib portable_kernels)
9391
target_link_options_shared_lib(portable_ops_lib)
9492
endif()
9593

96-
target_link_libraries(llama_main PUBLIC gflags llama_runner custom_ops_lib)
94+
if(EXECUTORCH_BUILD_CUSTOM)
95+
target_link_options_shared_lib(custom_ops)
96+
list(APPEND link_libraries custom_ops)
97+
endif()
9798

9899
# XNNPACK pthreadpool cpuinfo
99100
if(TARGET xnnpack_backend)
100101
set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
101102
list(APPEND link_libraries ${xnnpack_backend_libs})
103+
# HACK: main only include these when xnnpack backend is availabe, so that we
104+
# have all the threadpool sources under xnnpack.
105+
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
106+
list(
107+
APPEND
108+
_srcs
109+
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp
110+
)
111+
list(
112+
APPEND
113+
_common_include_directories
114+
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include
115+
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include
116+
)
117+
# end of hack
102118
target_link_options_shared_lib(xnnpack_backend)
103119
endif()
104120

@@ -114,15 +130,19 @@ if(TARGET qnn_executorch_backend)
114130
target_link_options_shared_lib(qnn_executorch_backend)
115131
endif()
116132

117-
# This one is needed for cpuinfo where it uses android
118-
# specific log lib
133+
# This one is needed for cpuinfo where it uses android specific log lib
119134
if(ANDROID)
120135
list(APPEND link_libraries log)
121136
endif()
122137

123-
target_compile_options(llama_main PUBLIC ${_common_compile_options}
124-
-DET_USE_THREADPOOL)
125-
target_link_libraries(llama_main PUBLIC ${link_libraries})
138+
add_executable(llama_main ${_srcs})
139+
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
140+
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
141+
endif()
142+
143+
target_include_directories(llama_main PUBLIC ${_common_include_directories})
144+
target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
145+
target_compile_options(llama_main PUBLIC ${_common_compile_options})
126146

127147
if(APPLE)
128148
target_link_options_shared_lib(executorch)

examples/models/llama2/TARGETS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ runtime.python_library(
1818
],
1919
deps = [
2020
"//caffe2:torch",
21-
"//executorch/examples/models/llama2/custom_ops:llama_custom_ops_aot_lib",
21+
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_py",
2222
],
2323
)
2424

@@ -52,6 +52,7 @@ runtime.python_binary(
5252
main_module = "executorch.examples.models.llama2.export_llama",
5353
# visibility = ["//executorch/examples/..."],
5454
preload_deps = [
55+
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_lib",
5556
"//executorch/kernels/quantized:aot_lib",
5657
],
5758
deps = [

0 commit comments

Comments
 (0)