1
+ use base64;
1
2
use fancy_regex:: Regex ;
2
3
use mlua:: prelude:: * ;
3
4
use rustc_hash:: FxHashMap as HashMap ;
@@ -6,7 +7,6 @@ use std::fs::File;
6
7
use std:: io:: { BufRead , BufReader } ;
7
8
use std:: sync:: { Arc , Mutex } ;
8
9
use std:: thread;
9
- use base64;
10
10
11
11
#[ cfg( feature = "multithreading" ) ]
12
12
const MAX_NUM_THREADS : usize = 128 ;
@@ -203,7 +203,7 @@ pub fn tiktoken_core(lua: &mlua::Lua) -> LuaResult<LuaTable> {
203
203
Ok ( ( ) )
204
204
} ,
205
205
) ?;
206
- let _encode = lua. create_function ( move |_, text : String | encode ( & * state2, text) ) ?;
206
+ let _encode = lua. create_function ( move |_, text : mlua :: String | encode ( & * state2, text) ) ?;
207
207
208
208
let exports = lua. create_table ( ) ?;
209
209
exports. set ( "new" , _new) ?;
@@ -261,7 +261,8 @@ fn new(
261
261
} ) ;
262
262
}
263
263
264
- fn encode ( state : & State , text : String ) -> LuaResult < ( Vec < usize > , usize , usize ) > {
264
+ fn encode ( state : & State , text : mlua:: String ) -> LuaResult < ( Vec < usize > , usize , usize ) > {
265
+ let encoded_str = String :: from_utf8_lossy ( text. as_bytes ( ) ) ;
265
266
let allowed_special = HashSet :: new ( ) ;
266
267
let max_tokens = None ;
267
268
Ok ( state
@@ -270,7 +271,7 @@ fn encode(state: &State, text: String) -> LuaResult<(Vec<usize>, usize, usize)>
270
271
. unwrap ( )
271
272
. as_ref ( )
272
273
. unwrap ( )
273
- . _encode_native ( & text , & allowed_special, max_tokens) )
274
+ . _encode_native ( & encoded_str , & allowed_special, max_tokens) )
274
275
}
275
276
276
277
pub struct CoreBPENative {
0 commit comments