Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ project(llama_runner)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)

option(EXECUTORCH_BUILD_RE2 "Build RE2" OFF)
option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)

include(CMakeDependentOption)
#
Expand Down Expand Up @@ -88,7 +88,7 @@ endif()

# llama_runner library
add_subdirectory(runner)
if(EXECUTORCH_BUILD_RE2)
if(EXECUTORCH_USE_TIKTOKEN)
# find RE2 for tokenizer
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag
Expand Down
10 changes: 1 addition & 9 deletions examples/models/llama2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ DEFINE_int32(
-1,
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");

DEFINE_bool(
use_tiktoken,
false,
"Use Tiktoken tokenizer instead of the default BPE tokenizer.");

int32_t main(int32_t argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);

Expand All @@ -62,8 +57,6 @@ int32_t main(int32_t argc, char** argv) {

int32_t cpu_threads = FLAGS_cpu_threads;

bool use_tiktoken = FLAGS_use_tiktoken;

#if defined(ET_USE_THREADPOOL)
uint32_t num_performant_cores = cpu_threads == -1
? torch::executorch::cpuinfo::get_num_performant_cores()
Expand All @@ -76,8 +69,7 @@ int32_t main(int32_t argc, char** argv) {
}
#endif
// create llama runner
::torch::executor::Runner runner(
model_path, tokenizer_path, temperature, use_tiktoken);
::torch::executor::Runner runner(model_path, tokenizer_path, temperature);

// generate
runner.generate(prompt, seq_len);
Expand Down
6 changes: 6 additions & 0 deletions examples/models/llama2/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ list(TRANSFORM _llama_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
target_include_directories(extension_module
INTERFACE ${_common_include_directories})

if(EXECUTORCH_USE_TIKTOKEN)
list(APPEND _llama_runner__srcs ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/tiktoken.cpp)
set(_preprocessor_flag -DET_USE_TIKTOKEN)
endif()

if(CMAKE_TOOLCHAIN_IOS OR ANDROID OR APPLE)
# Building a share library on iOS requires code signing
# On Android we see duplicated registration when using shared lib
Expand All @@ -55,3 +60,4 @@ target_link_libraries(
target_include_directories(llama_runner
INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT})
target_compile_options(llama_runner PUBLIC ${_preprocessor_flag})
18 changes: 9 additions & 9 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

#include <executorch/examples/models/llama2/runner/runner.h>
#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
#if defined(ET_USE_TIKTOKEN)
#include <executorch/examples/models/llama2/tokenizer/tiktoken.h>
#endif
#include <executorch/extension/evalue_util/print_evalue.h>
#include <executorch/extension/runner_util/managed_tensor.h>

Expand All @@ -38,10 +40,8 @@ std::string statsToJsonString(const Runner::Stats& stats);
Runner::Runner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature,
bool use_tiktoken)
: use_tiktoken_(use_tiktoken),
module_(std::make_unique<Module>(
const float temperature)
: module_(std::make_unique<Module>(
model_path,
Module::MlockConfig::UseMlockIgnoreErrors)),
tokenizer_path_(tokenizer_path),
Expand Down Expand Up @@ -80,11 +80,11 @@ Error Runner::load() {
append_eos_ = getMetadataHelper("append_eos_to_prompt", false);

// Load tokenizer
if (use_tiktoken_) {
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
} else {
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
}
#if defined(ET_USE_TIKTOKEN)
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
#else
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
#endif
tokenizer_->load(tokenizer_path_);
if (tokenizer_->bos_tok() != bos_id_) {
ET_LOG(
Expand Down
3 changes: 1 addition & 2 deletions examples/models/llama2/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ class Runner {
explicit Runner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature = 0.8f,
bool use_tiktoken = false);
const float temperature = 0.8f);

struct Stats {
// Scaling factor for timestamps - in this case, we use ms.
Expand Down
7 changes: 5 additions & 2 deletions examples/models/llama2/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@ def define_common_targets():
exported_deps = [
"//executorch/backends/xnnpack:xnnpack_backend",
"//executorch/examples/models/llama2/sampler:sampler" + aten_suffix,
"//executorch/examples/models/llama2/tokenizer:tokenizer",
"//executorch/extension/evalue_util:print_evalue" + aten_suffix,
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
"//executorch/extension/module:module" + aten_suffix,
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
] + (_get_operator_lib(aten)) + ([
] + ([
"//executorch/examples/models/llama2/tokenizer:tiktoken",
] if native.read_config("llama", "use_tiktoken", "0") == "1" else [
"//executorch/examples/models/llama2/tokenizer:bpe_tokenizer",
]) + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
"//executorch/backends/vulkan:vulkan_backend_lib",
Expand Down
20 changes: 18 additions & 2 deletions examples/models/llama2/tokenizer/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
runtime.cxx_library(
name = "tokenizer",
name = "bpe_tokenizer",
srcs = [
"bpe_tokenizer.cpp",
"tiktoken.cpp",
],
exported_headers = [
"tokenizer.h",
"bpe_tokenizer.h",
],
exported_deps = [
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
],
visibility = [
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "tiktoken",
srcs = [
"tiktoken.cpp",
],
exported_headers = [
"tokenizer.h",
"tiktoken.h",
"base64.h",
],
Expand Down