Support stresscli for codegen (#87)

yao531441 · pre-commit-ci[bot] · web-flow · commit 907dc19dd65a · 2024-08-29T09:07:20.000+08:00
* Support stresscli fr codegen * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/evals/benchmark/benchmark.py b/evals/benchmark/benchmark.py
@@ -19,7 +19,7 @@
         "llm_serving": "/v1/chat/completions",
         "e2e": "/v1/chatqna",
     },
-    "codegen": {"llm": "/v1/chat/completions", "llm_serving": "/v1/chat/completions", "e2e": "/v1/codegen"},
+    "codegen": {"llm": "/generate_stream", "llm_serving": "/v1/chat/completions", "e2e": "/v1/codegen"},
     "codetrans": {"llm": "/generate", "llm_serving": "/v1/chat/completions", "e2e": "/v1/codetrans"},
     "faqgen": {"llm": "/v1/chat/completions", "llm_serving": "/v1/chat/completions", "e2e": "/v1/faqgen"},
     "audioqna": {
diff --git a/evals/benchmark/benchmark.yaml b/evals/benchmark/benchmark.yaml
@@ -60,7 +60,7 @@ test_cases:
   codegen:
     llm:
       run_test: true
-      service_name: "llm-svc"  # Replace with your service name
+      service_name: "llm-dependency-svc"  # Replace with your service name
       parameters:
         model_name: "Qwen/CodeQwen1.5-7B-Chat"
         max_new_tokens: 128
@@ -71,10 +71,10 @@ test_cases:
         streaming: true
     llmserve:
       run_test: true
-      service_name: "llm-serving-svc"  # Replace with your service name
+      service_name: "llm-svc"  # Replace with your service name
     e2e:
       run_test: true
-      service_name: "codegen-backend-server-svc"  # Replace with your service name
+      service_name: "codegen-backend-svc"  # Replace with your service name
 
   codetrans:
     llm:
diff --git a/evals/benchmark/stresscli/dataset/codegen.json b/evals/benchmark/stresscli/dataset/codegen.json
diff --git a/evals/benchmark/stresscli/locust/codegenbench.py b/evals/benchmark/stresscli/locust/codegenbench.py
@@ -9,11 +9,11 @@
 import tokenresponse as token
 
 cwd = os.path.dirname(__file__)
-filename = f"{cwd}/../dataset/chatqna.json"
-qlist = []
+filename = f"{cwd}/../dataset/codegen.json"
+qdict = {}
 try:
     with open(filename) as qfile:
-        qlist = json.load(qfile)
+        qdict = json.load(qfile)
 except:
     logging.error(f"Question File open failed: {filename}")
     exit()
@@ -24,10 +24,8 @@ def getUrl():
 
 
 def getReqData():
-    qid = random.randint(1, 189)
-    logging.debug(f"Selected question: {qlist[qid]['qText']}")
-
-    return {"messages": qlist[qid]["qText"], "max_tokens": 128}
+    prompt = "50"
+    return {"messages": qdict[prompt], "max_tokens": 128}
 
 
 def respStatics(environment, resp):
diff --git a/evals/benchmark/stresscli/locust/codegenfixed.py b/evals/benchmark/stresscli/locust/codegenfixed.py
@@ -5,15 +5,15 @@
 
 
 def getUrl():
-    return "/v1/chatqna"
+    return "/v1/codegen"
 
 
 def getReqData():
     return {"messages": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128}
 
 
-def respStatics(environment, resp):
-    return token.respStatics(environment, resp)
+def respStatics(environment, reqData, respData):
+    return token.respStatics(environment, reqData, respData)
 
 
 def staticsOutput(environment, reqlist):