@@ -109,16 +109,14 @@ def inference(
109
109
top_p : float = 0.9 ,
110
110
max_gen_len : int = 64 ,
111
111
):
112
- results = generator .text_completion (
113
- prompts ,
114
- max_gen_len = max_gen_len ,
115
- temperature = temperature ,
116
- top_p = top_p ,
117
- )
118
- for prompt , result in zip (prompts , results ):
119
- print (prompt )
120
- print (f"> { result ['generation' ]} " )
121
- print ("\n ==================================\n " )
112
+ with torch .no_grad ():
113
+ results = generator .text_completion (
114
+ prompts ,
115
+ max_gen_len = max_gen_len ,
116
+ temperature = temperature ,
117
+ top_p = top_p ,
118
+ )
119
+ return zip (prompts , results )
122
120
123
121
def __get_next_batch (dataloader ):
124
122
return next (iter (dataloader ))
@@ -139,8 +137,9 @@ def benchmark(ckpt_dir,
139
137
print ("Running inference benchmark...\n " )
140
138
141
139
with profile (activities = [ProfilerActivity .CPU , ProfilerActivity .CUDA ], record_shapes = True , profile_memory = PROFILE_MEMORY ) as prof :
142
- with record_function ("run_benchmark" ):
143
- _ , load , inference , total = run_benchmark (data_loader , net )
140
+ # with record_function("run_benchmark"):
141
+ # _, load, inference, total = run_benchmark(data_loader, net)
142
+ _ , load , inference , total = run_benchmark (data_loader , net )
144
143
profile_cuda_time = prof .key_averages ().table (sort_by = "cuda_time_total" , row_limit = 10 )
145
144
profile_cuda_mem = prof .key_averages ().table (sort_by = "self_cuda_memory_usage" , row_limit = 10 )
146
145
0 commit comments