|
| 1 | +import functools |
| 2 | +import sys |
| 3 | +import threading |
| 4 | + |
1 | 5 | from fastdeploy import LLM, SamplingParams |
2 | 6 | from fastdeploy.utils import set_random_seed |
3 | 7 |
|
4 | | -set_random_seed(123) |
5 | | - |
6 | | -prompts = [ |
7 | | - "Hello, my name is", |
8 | | -] |
9 | | - |
10 | | -# 采样参数 |
11 | | -sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=16) |
12 | | - |
13 | | -# 加载模型 |
14 | | -llm = LLM( |
15 | | - model="/data1/fastdeploy/ERNIE_300B_4L", |
16 | | - tensor_parallel_size=8, |
17 | | - max_model_len=8192, |
18 | | - quantization="wint8", |
19 | | - block_size=16, |
20 | | -) |
21 | | - |
22 | | -# 批量进行推理(llm内部基于资源情况进行请求排队、动态插入处理) |
23 | | -outputs = llm.generate(prompts, sampling_params) |
24 | | - |
25 | | -assert outputs[0].outputs.token_ids == [ |
26 | | - 23768, |
27 | | - 97000, |
28 | | - 47814, |
29 | | - 59335, |
30 | | - 68170, |
31 | | - 183, |
32 | | - 49080, |
33 | | - 94717, |
34 | | - 82966, |
35 | | - 99140, |
36 | | - 31615, |
37 | | - 51497, |
38 | | - 94851, |
39 | | - 60764, |
40 | | - 10889, |
41 | | - 2, |
42 | | -], f"{outputs[0].outputs.token_ids}" |
| 8 | + |
| 9 | +def timeout(seconds): |
| 10 | + def decorator(func): |
| 11 | + @functools.wraps(func) |
| 12 | + def wrapper(*args, **kwargs): |
| 13 | + result = [None] |
| 14 | + exception = [None] |
| 15 | + |
| 16 | + def target(): |
| 17 | + try: |
| 18 | + result[0] = func(*args, **kwargs) |
| 19 | + except Exception as e: |
| 20 | + exception[0] = e |
| 21 | + |
| 22 | + thread = threading.Thread(target=target) |
| 23 | + thread.daemon = True |
| 24 | + thread.start() |
| 25 | + thread.join(seconds) |
| 26 | + |
| 27 | + if thread.is_alive(): |
| 28 | + raise TimeoutError(f"Function timed out after {seconds} seconds") |
| 29 | + |
| 30 | + if exception[0]: |
| 31 | + raise exception[0] |
| 32 | + |
| 33 | + return result[0] |
| 34 | + |
| 35 | + return wrapper |
| 36 | + |
| 37 | + return decorator |
| 38 | + |
| 39 | + |
| 40 | +@timeout(60) |
| 41 | +def offline_infer_check(): |
| 42 | + set_random_seed(123) |
| 43 | + |
| 44 | + prompts = [ |
| 45 | + "Hello, my name is", |
| 46 | + ] |
| 47 | + |
| 48 | + # 采样参数 |
| 49 | + sampling_params = SamplingParams(temperature=0.8, top_p=0.00001, max_tokens=16) |
| 50 | + |
| 51 | + # 加载模型 |
| 52 | + llm = LLM( |
| 53 | + model="/data1/fastdeploy/ERNIE_300B_4L", |
| 54 | + tensor_parallel_size=8, |
| 55 | + max_model_len=8192, |
| 56 | + quantization="wint8", |
| 57 | + block_size=16, |
| 58 | + ) |
| 59 | + |
| 60 | + # 批量进行推理(llm内部基于资源情况进行请求排队、动态插入处理) |
| 61 | + outputs = llm.generate(prompts, sampling_params) |
| 62 | + |
| 63 | + assert outputs[0].outputs.token_ids == [ |
| 64 | + 23768, |
| 65 | + 97000, |
| 66 | + 47814, |
| 67 | + 59335, |
| 68 | + 68170, |
| 69 | + 183, |
| 70 | + 49080, |
| 71 | + 94717, |
| 72 | + 82966, |
| 73 | + 99140, |
| 74 | + 31615, |
| 75 | + 51497, |
| 76 | + 94851, |
| 77 | + 60764, |
| 78 | + 10889, |
| 79 | + 2, |
| 80 | + ], f"{outputs[0].outputs.token_ids}" |
| 81 | + print("PASSED") |
| 82 | + |
| 83 | + |
| 84 | +if __name__ == "__main__": |
| 85 | + try: |
| 86 | + result = offline_infer_check() |
| 87 | + sys.exit(0) |
| 88 | + except TimeoutError: |
| 89 | + sys.exit(124) |
| 90 | + except Exception: |
| 91 | + sys.exit(1) |
0 commit comments