1
1
use lorax_client:: {
2
- Batch , NextTokenChooserParameters , Request , ShardInfo , ShardedClient ,
3
- StoppingCriteriaParameters ,
2
+ input_chunk , Batch , InputChunk , NextTokenChooserParameters , Request , ShardInfo , ShardedClient ,
3
+ StoppingCriteriaParameters , TokenizedInputs ,
4
4
} ;
5
5
use std:: sync:: atomic:: { AtomicBool , Ordering } ;
6
6
use std:: sync:: Arc ;
@@ -40,7 +40,12 @@ impl Health {
40
40
let generation_liveness_request = Request {
41
41
id : LIVENESS_ID ,
42
42
inputs : "liveness" . to_string ( ) ,
43
- tokenized_inputs : None ,
43
+ tokenized_inputs : Some ( TokenizedInputs {
44
+ ids : vec ! [ 75 ] ,
45
+ input_chunks : vec ! [ InputChunk {
46
+ chunk: Some ( input_chunk:: Chunk :: Text ( "liveness" . to_string( ) ) ) ,
47
+ } ] ,
48
+ } ) ,
44
49
truncate : 10 ,
45
50
prefill_logprobs : false ,
46
51
parameters : Some ( NextTokenChooserParameters {
@@ -66,7 +71,7 @@ impl Health {
66
71
adapter_index : 0 ,
67
72
// Block 0 is reserved for health checks
68
73
blocks : vec ! [ 0 ] ,
69
- slots : ( 0 ..16 ) . collect ( ) ,
74
+ slots : ( 0 ..self . shard_info . block_size ) . collect ( ) ,
70
75
cache_len : 0 ,
71
76
chunk_len : None ,
72
77
} ;
@@ -84,15 +89,20 @@ impl Health {
84
89
pub ( crate ) async fn check_classification ( & mut self ) -> bool {
85
90
let classify_request = Request {
86
91
id : LIVENESS_ID ,
87
- inputs : "San Francisco" . to_string ( ) ,
88
- tokenized_inputs : None ,
92
+ inputs : "liveness" . to_string ( ) ,
93
+ tokenized_inputs : Some ( TokenizedInputs {
94
+ ids : vec ! [ 75 ] ,
95
+ input_chunks : vec ! [ InputChunk {
96
+ chunk: Some ( input_chunk:: Chunk :: Text ( "liveness" . to_string( ) ) ) ,
97
+ } ] ,
98
+ } ) ,
89
99
truncate : 10 ,
90
100
prefill_logprobs : false ,
91
101
parameters : None ,
92
102
stopping_parameters : None ,
93
103
adapter_index : 0 ,
94
104
blocks : vec ! [ 0 ] ,
95
- slots : ( 0 ..16 ) . collect ( ) ,
105
+ slots : ( 0 ..self . shard_info . block_size ) . collect ( ) ,
96
106
cache_len : 0 ,
97
107
chunk_len : None ,
98
108
} ;
@@ -109,15 +119,20 @@ impl Health {
109
119
pub ( crate ) async fn check_embeddings ( & mut self ) -> bool {
110
120
let embed_request = Request {
111
121
id : LIVENESS_ID ,
112
- inputs : "San Francisco" . to_string ( ) ,
113
- tokenized_inputs : None ,
122
+ inputs : "liveness" . to_string ( ) ,
123
+ tokenized_inputs : Some ( TokenizedInputs {
124
+ ids : vec ! [ 75 ] ,
125
+ input_chunks : vec ! [ InputChunk {
126
+ chunk: Some ( input_chunk:: Chunk :: Text ( "liveness" . to_string( ) ) ) ,
127
+ } ] ,
128
+ } ) ,
114
129
truncate : 10 ,
115
130
prefill_logprobs : false ,
116
131
parameters : None ,
117
132
stopping_parameters : None ,
118
133
adapter_index : 0 ,
119
134
blocks : vec ! [ 0 ] ,
120
- slots : ( 0 ..16 ) . collect ( ) ,
135
+ slots : ( 0 ..self . shard_info . block_size ) . collect ( ) ,
121
136
cache_len : 0 ,
122
137
chunk_len : None ,
123
138
} ;
0 commit comments