Skip to content

Commit 744839c

Browse files
committed
Merge branch 'aligner/nemotron5' of https://github.com/NVIDIA/NeMo into aligner/nemotron5
2 parents 0278a01 + 0a63807 commit 744839c

File tree

2 files changed

+28
-17
lines changed

2 files changed

+28
-17
lines changed

examples/nlp/language_modeling/megatron_mamba_eval.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import json
1818
import os
1919
import threading
20+
import time
2021
from functools import partial
2122

2223
import torch
@@ -352,23 +353,31 @@ def main(cfg) -> None:
352353
prompts = load_prompts(cfg)
353354

354355
# First method of running text generation, call model.generate method
355-
response = model.generate(inputs=prompts, length_params=length_params, sampling_params=sampling_params)
356+
for i in range(3):
357+
st = time.perf_counter()
358+
response = model.generate(inputs=prompts, length_params=length_params, sampling_params=sampling_params)
359+
tdiff = time.perf_counter() - st
360+
print(f"[Try{i} model.generate took {tdiff} seconds...")
356361

357-
print("***************************")
358-
print(response)
359-
print("***************************")
362+
# print("***************************")
363+
# print(response)
364+
# print("***************************")
360365

361366
# Second method of running text generation, call trainer.predict [recommended]
362367
bs = 2
363368
ds = RequestDataSet(prompts)
364369
request_dl = DataLoader(dataset=ds, batch_size=bs)
365370
config = OmegaConf.to_container(cfg.inference)
366371
model.set_inference_config(config)
367-
response = trainer.predict(model, request_dl)
368-
369-
print("***************************")
370-
print(response)
371-
print("***************************")
372+
for i in range(3):
373+
st = time.perf_counter()
374+
response = trainer.predict(model, request_dl)
375+
tdiff = time.perf_counter() - st
376+
print(f"[Try{i} trainer.predict took {tdiff} seconds...")
377+
378+
# print("***************************")
379+
# print(response)
380+
# print("***************************")
372381

373382
# Third method of running text generation, use inference server
374383
if cfg.server:

nemo/collections/nlp/modules/common/text_generation_server.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,17 @@ def chat_completion(self, data):
171171
if OmegaConf.select(self.model.cfg, "data.chat_prompt_tokens") is not None:
172172
special_tokens = self.model.cfg.data.chat_prompt_tokens
173173
else:
174-
#raise RuntimeError(
174+
# raise RuntimeError(
175175
# "You don't have a model (model_config.yaml) which has chat_prompt_tokens, are you sure this is a Chat/Instruction model?"
176-
#)
176+
# )
177177
# (@adithyare) hacking in the special tokens to test non-chat models for debugging
178-
special_tokens = {"system_turn_start": "<SPECIAL_10>",
179-
"turn_start": "<SPECIAL_11>",
180-
"label_start": "<SPECIAL_12>",
181-
"end_of_name": "\n",
182-
"end_of_turn": "\n"}
178+
special_tokens = {
179+
"system_turn_start": "<SPECIAL_10>",
180+
"turn_start": "<SPECIAL_11>",
181+
"label_start": "<SPECIAL_12>",
182+
"end_of_name": "\n",
183+
"end_of_turn": "\n",
184+
}
183185
nemo_source = self.convert_messages(data['messages'])
184186
header, conversation, data_type, mask_role = _get_header_conversation_type_mask_role(
185187
nemo_source, special_tokens
@@ -432,7 +434,7 @@ def put(self):
432434
# (@adithyare) resolves a json byte conversion issue (taken from chat_completeion)
433435
for i in range(len(output['tokens'])):
434436
tokens = output['tokens'][i]
435-
output['tokens'][i] = [t.decode('utf-8', errors='replace') if isinstance(t, bytes) else t for t in tokens]
437+
output['tokens'][i] = [t.decode('utf-8', errors='replace') if isinstance(t, bytes) else t for t in tokens]
436438

437439
if not all_probs:
438440
del output['full_logprob']

0 commit comments

Comments
 (0)