Skip to content

Commit 72500a0

Browse files
authored
Tokenizer test
Differential Revision: D69860352 Pull Request resolved: #21
1 parent c340b78 commit 72500a0

6 files changed

+102
-24
lines changed

targets.bzl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def define_common_targets():
1515
]),
1616
visibility = [
1717
"@EXECUTORCH_CLIENTS",
18+
"//pytorch/tokenizers/...",
1819
],
1920
header_namespace = "",
2021
)
@@ -29,12 +30,14 @@ def define_common_targets():
2930
],
3031
visibility = [
3132
"@EXECUTORCH_CLIENTS",
33+
"//pytorch/tokenizers/...",
3234
],
3335
compiler_flags = [
3436
"-D_USE_INTERNAL_STRING_VIEW",
3537
],
3638
external_deps = [
3739
"sentencepiece",
40+
"abseil-cpp",
3841
],
3942
)
4043

@@ -49,6 +52,7 @@ def define_common_targets():
4952
],
5053
visibility = [
5154
"@EXECUTORCH_CLIENTS",
55+
"//pytorch/tokenizers/...",
5256
],
5357
compiler_flags = [
5458
"-D_USE_INTERNAL_STRING_VIEW",
@@ -84,6 +88,7 @@ def define_common_targets():
8488
],
8589
visibility = [
8690
"@EXECUTORCH_CLIENTS",
91+
"//pytorch/tokenizers/...",
8792
],
8893
compiler_flags = [
8994
"-D_USE_INTERNAL_STRING_VIEW",
@@ -104,5 +109,6 @@ def define_common_targets():
104109
],
105110
visibility = [
106111
"@EXECUTORCH_CLIENTS",
112+
"//pytorch/tokenizers/...",
107113
],
108114
)

test/TARGETS

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Any targets that should be shared between fbcode and xplat must be defined in
2+
# targets.bzl. This file can contain fbcode-only targets.
3+
4+
load(":targets.bzl", "define_common_targets")
5+
6+
oncall("executorch")
7+
8+
define_common_targets()

test/targets.bzl

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
load(
2+
"@fbsource//tools/build_defs:default_platform_defs.bzl",
3+
"ANDROID",
4+
"CXX",
5+
)
6+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
7+
8+
def define_common_targets():
9+
"""Defines targets that should be shared between fbcode and xplat.
10+
11+
The directory containing this targets.bzl file should also contain both
12+
TARGETS and BUCK files that call this function.
13+
"""
14+
runtime.cxx_test(
15+
name = "test_base64",
16+
srcs = [
17+
"test_base64.cpp",
18+
],
19+
deps = [
20+
"//pytorch/tokenizers:headers",
21+
],
22+
)
23+
24+
runtime.cxx_test(
25+
name = "test_llama2c_tokenizer",
26+
srcs = [
27+
"test_llama2c_tokenizer.cpp",
28+
],
29+
deps = [
30+
"//pytorch/tokenizers:llama2c_tokenizer",
31+
],
32+
env = {
33+
"RESOURCES_PATH": "$(location :resources)/resources",
34+
},
35+
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform.
36+
)
37+
38+
runtime.cxx_test(
39+
name = "test_pre_tokenizer",
40+
srcs = [
41+
"test_pre_tokenizer.cpp",
42+
],
43+
deps = [
44+
"//pytorch/tokenizers:headers",
45+
"//pytorch/tokenizers:hf_tokenizer",
46+
],
47+
)
48+
49+
runtime.cxx_test(
50+
name = "test_sentencepiece",
51+
srcs = [
52+
"test_sentencepiece.cpp",
53+
],
54+
deps = ["//pytorch/tokenizers:sentencepiece"],
55+
external_deps = [
56+
"sentencepiece",
57+
"abseil-cpp",
58+
],
59+
env = {
60+
"RESOURCES_PATH": "$(location :resources)/resources",
61+
},
62+
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform.
63+
)
64+
65+
runtime.cxx_test(
66+
name = "test_tiktoken",
67+
srcs = [
68+
"test_tiktoken.cpp",
69+
],
70+
deps = [
71+
"//pytorch/tokenizers:tiktoken",
72+
],
73+
env = {
74+
"RESOURCES_PATH": "$(location :resources)/resources",
75+
},
76+
platforms = [CXX, ANDROID], # Cannot bundle resources on Apple platform.
77+
external_deps = [
78+
"re2",
79+
],
80+
)
81+
82+
runtime.filegroup(
83+
name = "resources",
84+
srcs = native.glob([
85+
"resources/**",
86+
]),
87+
)

test/test_llama2c_tokenizer.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
/*
2-
* Copyright (c) Meta Platforms, Inc. and affiliates.
3-
* All rights reserved.
4-
*
5-
* This source code is licensed under the BSD-style license found in the
6-
* LICENSE file in the root directory of this source tree.
7-
*/
1+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
82

93
#ifdef TOKENIZERS_FB_BUCK
104
#include <TestResourceUtils/TestResourceUtils.h>

test/test_sentencepiece.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,15 @@
77
*/
88
// @lint-ignore-every LICENSELINT
99

10-
#ifdef TOKENIZERS_FB_BUCK
11-
#include <TestResourceUtils/TestResourceUtils.h>
12-
#endif
1310
#include <gtest/gtest.h>
1411
#include <pytorch/tokenizers/sentencepiece.h>
1512

1613
namespace tokenizers {
1714

1815
namespace {
1916
static inline std::string _get_resource_path(const std::string& name) {
20-
#ifdef TOKENIZERS_FB_BUCK
21-
return facebook::xplat::testing::getPathForTestResource(
22-
"test/resources/" + name);
23-
#else
2417
return std::getenv("RESOURCES_PATH") + std::string("/") + name;
25-
#endif
2618
}
27-
2819
} // namespace
2920

3021
TEST(SPTokenizerTest, TestEncodeWithoutLoad) {

test/test_tiktoken.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
*/
88
// @lint-ignore-every LICENSELINT
99

10-
#ifdef TOKENIZERS_FB_BUCK
11-
#include <TestResourceUtils/TestResourceUtils.h>
12-
#endif
1310
#include <gtest/gtest.h>
1411
#include <pytorch/tokenizers/tiktoken.h>
1512

@@ -45,12 +42,7 @@ static inline std::unique_ptr<std::vector<std::string>> _get_special_tokens() {
4542
}
4643

4744
static inline std::string _get_resource_path(const std::string& name) {
48-
#ifdef TOKENIZERS_FB_BUCK
49-
return facebook::xplat::testing::getPathForTestResource(
50-
"test/resources/" + name);
51-
#else
5245
return std::getenv("RESOURCES_PATH") + std::string("/") + name;
53-
#endif
5446
}
5547

5648
} // namespace

0 commit comments

Comments
 (0)