@@ -24,6 +24,7 @@ func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama
24
24
actor LlamaContext {
25
25
private var model : OpaquePointer
26
26
private var context : OpaquePointer
27
+ private var vocab : OpaquePointer
27
28
private var sampling : UnsafeMutablePointer < llama_sampler >
28
29
private var batch : llama_batch
29
30
private var tokens_list : [ llama_token ]
@@ -47,6 +48,7 @@ actor LlamaContext {
47
48
self . sampling = llama_sampler_chain_init ( sparams)
48
49
llama_sampler_chain_add ( self . sampling, llama_sampler_init_temp ( 0.4 ) )
49
50
llama_sampler_chain_add ( self . sampling, llama_sampler_init_dist ( 1234 ) )
51
+ vocab = llama_model_get_vocab ( model)
50
52
}
51
53
52
54
deinit {
@@ -79,7 +81,7 @@ actor LlamaContext {
79
81
ctx_params. n_threads = Int32 ( n_threads)
80
82
ctx_params. n_threads_batch = Int32 ( n_threads)
81
83
82
- let context = llama_new_context_with_model ( model, ctx_params)
84
+ let context = llama_init_from_model ( model, ctx_params)
83
85
guard let context else {
84
86
print ( " Could not load context! " )
85
87
throw LlamaError . couldNotInitializeContext
@@ -151,7 +153,7 @@ actor LlamaContext {
151
153
152
154
new_token_id = llama_sampler_sample ( sampling, context, batch. n_tokens - 1 )
153
155
154
- if llama_vocab_is_eog ( model , new_token_id) || n_cur == n_len {
156
+ if llama_vocab_is_eog ( vocab , new_token_id) || n_cur == n_len {
155
157
print ( " \n " )
156
158
is_done = true
157
159
let new_token_str = String ( cString: temporary_invalid_cchars + [ 0 ] )
@@ -297,7 +299,7 @@ actor LlamaContext {
297
299
let utf8Count = text. utf8. count
298
300
let n_tokens = utf8Count + ( add_bos ? 1 : 0 ) + 1
299
301
let tokens = UnsafeMutablePointer< llama_token> . allocate( capacity: n_tokens)
300
- let tokenCount = llama_tokenize ( model , text, Int32 ( utf8Count) , tokens, Int32 ( n_tokens) , add_bos, false )
302
+ let tokenCount = llama_tokenize ( vocab , text, Int32 ( utf8Count) , tokens, Int32 ( n_tokens) , add_bos, false )
301
303
302
304
var swiftTokens : [ llama_token ] = [ ]
303
305
for i in 0 ..< tokenCount {
@@ -316,15 +318,15 @@ actor LlamaContext {
316
318
defer {
317
319
result. deallocate ( )
318
320
}
319
- let nTokens = llama_token_to_piece ( model , token, result, 8 , 0 , false )
321
+ let nTokens = llama_token_to_piece ( vocab , token, result, 8 , 0 , false )
320
322
321
323
if nTokens < 0 {
322
324
let newResult = UnsafeMutablePointer< Int8> . allocate( capacity: Int ( - nTokens) )
323
325
newResult. initialize ( repeating: Int8 ( 0 ) , count: Int ( - nTokens) )
324
326
defer {
325
327
newResult. deallocate ( )
326
328
}
327
- let nNewTokens = llama_token_to_piece ( model , token, newResult, - nTokens, 0 , false )
329
+ let nNewTokens = llama_token_to_piece ( vocab , token, newResult, - nTokens, 0 , false )
328
330
let bufferPointer = UnsafeBufferPointer ( start: newResult, count: Int ( nNewTokens) )
329
331
return Array ( bufferPointer)
330
332
} else {
0 commit comments