1 file changed
+1
-1
lines changedSubmodule tokenizers updated 28 files
- .gitmodules-3
- CMakeLists.txt+2-46
- include/pytorch/tokenizers/bpe_tokenizer_base.h+9-12
- include/pytorch/tokenizers/hf_tokenizer.h+5-2
- include/pytorch/tokenizers/pcre2_regex.h-52
- include/pytorch/tokenizers/pre_tokenizer.h+8-11
- include/pytorch/tokenizers/re2_regex.h-44
- include/pytorch/tokenizers/regex.h-48
- include/pytorch/tokenizers/result.h-17
- include/pytorch/tokenizers/std_regex.h-40
- include/pytorch/tokenizers/tiktoken.h+15-5
- include/pytorch/tokenizers/token_decoder.h+2-2
- include/pytorch/tokenizers/tokenizer.h+1-1
- src/bpe_tokenizer_base.cpp+27-20
- src/hf_tokenizer.cpp+6-4
- src/pcre2_regex.cpp-109
- src/pre_tokenizer.cpp+15-17
- src/re2_regex.cpp-36
- src/regex.cpp-73
- src/std_regex.cpp-30
- src/tiktoken.cpp+100-15
- src/token_decoder.cpp+4-3
- targets.bzl+13-33
- test/test_base64.cpp+1-1
- test/test_pre_tokenizer.cpp+2-1
- test/test_regex.cpp-107
- third-party/pcre2-1
- third-party/targets.bzl-74
0 commit comments