Revert wrong handling of stream (#1354)

Spycsh · web-flow · commit 644b5c687d34 · 2025-03-03T13:22:28.000+08:00
#1300 (comment) Signed-off-by: Spycsh <sihan.chen@intel.com>
diff --git a/comps/cores/mega/orchestrator.py b/comps/cores/mega/orchestrator.py
@@ -207,7 +207,7 @@ def process_outputs(self, prev_nodes: List, result_dict: Dict) -> Dict:
             all_outputs.update(result_dict[prev_node])
         return all_outputs
 
-    async def wrap_iterable(self, aiterable, is_first=True):
+    def wrap_iterable(self, iterable, is_first=True):
 
         with tracer.start_as_current_span("llm_generate_stream") if ENABLE_OPEA_TELEMETRY else contextlib.nullcontext():
             while True:
@@ -217,10 +217,10 @@ async def wrap_iterable(self, aiterable, is_first=True):
                     else contextlib.nullcontext()
                 ):  #  else tracer.start_as_current_span(f"llm_generate_stream_next_token")
                     try:
-                        token = await anext(aiterable)
+                        token = next(iterable)
                         yield token
                         is_first = False
-                    except StopAsyncIteration:
+                    except StopIteration:
                         # Exiting the iterable loop cleanly
                         break
                     except Exception as e:
@@ -259,51 +259,49 @@ async def execute(
                 if ENABLE_OPEA_TELEMETRY
                 else contextlib.nullcontext()
             ):
-                async with aiohttp.ClientSession() as session:
-                    response = await session.post(
-                        url=endpoint,
-                        data=json.dumps(inputs),
-                        headers={"Content-type": "application/json"},
-                        proxy=None,
-                        timeout=aiohttp.ClientTimeout(total=1000),
-                    )
+                response = requests.post(
+                    url=endpoint,
+                    data=json.dumps(inputs),
+                    headers={"Content-type": "application/json"},
+                    proxies={"http": None},
+                    stream=True,
+                    timeout=1000,
+                )
             downstream = runtime_graph.downstream(cur_node)
             if downstream:
                 assert len(downstream) == 1, "Not supported multiple stream downstreams yet!"
                 cur_node = downstream[0]
                 hitted_ends = [".", "?", "!", "。", "，", "！"]
                 downstream_endpoint = self.services[downstream[0]].endpoint_path
 
-            async def generate():
+            def generate():
                 token_start = req_start
                 if response:
                     # response.elapsed = time until first headers received
                     buffered_chunk_str = ""
                     is_first = True
-                    async for chunk in self.wrap_iterable(response.content.iter_chunked(None)):
+                    for chunk in self.wrap_iterable(response.iter_content(chunk_size=None)):
                         if chunk:
                             if downstream:
                                 chunk = chunk.decode("utf-8")
                                 buffered_chunk_str += self.extract_chunk_str(chunk)
                                 is_last = chunk.endswith("[DONE]\n\n")
                                 if (buffered_chunk_str and buffered_chunk_str[-1] in hitted_ends) or is_last:
-                                    async with aiohttp.ClientSession() as downstream_session:
-                                        res = await downstream_session.post(
-                                            url=downstream_endpoint,
-                                            data=json.dumps({"text": buffered_chunk_str}),
-                                            proxy=None,
-                                        )
-                                        res_json = await res.json()
-                                        if "text" in res_json:
-                                            res_txt = res_json["text"]
-                                        else:
-                                            raise Exception("Other response types not supported yet!")
-                                        buffered_chunk_str = ""  # clear
-                                        async for item in self.token_generator(
-                                            res_txt, token_start, is_first=is_first, is_last=is_last
-                                        ):
-                                            yield item
-                                        token_start = time.time()
+                                    res = requests.post(
+                                        url=downstream_endpoint,
+                                        data=json.dumps({"text": buffered_chunk_str}),
+                                        proxies={"http": None},
+                                    )
+                                    res_json = res.json()
+                                    if "text" in res_json:
+                                        res_txt = res_json["text"]
+                                    else:
+                                        raise Exception("Other response types not supported yet!")
+                                    buffered_chunk_str = ""  # clear
+                                    yield from self.token_generator(
+                                        res_txt, token_start, is_first=is_first, is_last=is_last
+                                    )
+                                    token_start = time.time()
                             else:
                                 token_start = self.metrics.token_update(token_start, is_first)
                                 yield chunk
diff --git a/comps/cores/mega/orchestrator_with_yaml.py b/comps/cores/mega/orchestrator_with_yaml.py
@@ -1,7 +1,6 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import asyncio
 import json
 import re
 from collections import OrderedDict
@@ -24,10 +23,10 @@ def __init__(self, yaml_file_path: str):
         if not is_valid:
             raise Exception("Invalid mega graph!")
 
-    async def execute(self, cur_node: str, inputs: Dict):
+    def execute(self, cur_node: str, inputs: Dict):
         # send the cur_node request/reply
         endpoint = self.docs["opea_micro_services"][cur_node]["endpoint"]
-        response = await asyncio.to_thread(requests.post, url=endpoint, data=json.dumps(inputs), proxies={"http": None})
+        response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
         print(response)
         return response.json()
 
@@ -49,7 +48,7 @@ async def schedule(self, initial_inputs: Dict):
                 inputs = initial_inputs
             else:
                 inputs = self.process_outputs(self.predecessors(node))
-            response = await self.execute(node, inputs)
+            response = self.execute(node, inputs)
             self.result_dict[node] = response
 
     def _load_from_yaml(self):