1212
1313
1414async def ollama_stream_generator (
15- stream : AsyncIterator [ChatResponse ], is_cline_client : bool
15+ stream : AsyncIterator [ChatResponse ], base_tool : str
1616) -> AsyncIterator [str ]:
1717 """OpenAI-style SSE format"""
1818 try :
1919 async for chunk in stream :
2020 try :
21- yield f"{ chunk .model_dump_json ()} \n \n "
2221 # TODO We should wire in the client info so we can respond with
2322 # the correct format and start to handle multiple clients
2423 # in a more robust way.
25- if not is_cline_client :
24+ if base_tool != "cline" :
2625 yield f"{ chunk .model_dump_json ()} \n "
2726 else :
2827 # First get the raw dict from the chunk
@@ -63,6 +62,9 @@ async def ollama_stream_generator(
6362 for field in optional_fields :
6463 if field in chunk_dict :
6564 response [field ] = chunk_dict [field ]
65+
66+ print ("in cline" )
67+ print (json .dumps (response ))
6668 yield f"data: { json .dumps (response )} \n "
6769 except Exception as e :
6870 logger .error (f"Error in stream generator: { str (e )} " )
@@ -76,26 +78,18 @@ class OllamaShim(BaseCompletionHandler):
7678
7779 def __init__ (self , base_url ):
7880 self .client = AsyncClient (host = base_url , timeout = 300 )
79- self .is_cline_client = False
81+ self .base_tool = ""
8082
8183 async def execute_completion (
8284 self ,
8385 request : ChatCompletionRequest ,
8486 api_key : Optional [str ],
8587 stream : bool = False ,
8688 is_fim_request : bool = False ,
87- is_cline_client : bool = False ,
89+ base_tool : Optional [ str ] = "" ,
8890 ) -> Union [ChatResponse , GenerateResponse ]:
8991 """Stream response directly from Ollama API."""
90-
91- # TODO: I don't like this, but it's a quick fix for now until we start
92- # passing through the client info so we can respond with the correct
93- # format.
94- # Determine if the client is a Cline client
95- self .is_cline_client = any (
96- "Cline" in str (message .get ("content" , "" )) for message in request .get ("messages" , [])
97- )
98-
92+ self .base_tool = base_tool
9993 if is_fim_request :
10094 prompt = request ["messages" ][0 ].get ("content" , "" )
10195 response = await self .client .generate (
@@ -116,7 +110,7 @@ def _create_streaming_response(self, stream: AsyncIterator[ChatResponse]) -> Str
116110 is the format that FastAPI expects for streaming responses.
117111 """
118112 return StreamingResponse (
119- ollama_stream_generator (stream , self .is_cline_client ),
113+ ollama_stream_generator (stream , self .base_tool or "" ),
120114 media_type = "application/x-ndjson; charset=utf-8" ,
121115 headers = {
122116 "Cache-Control" : "no-cache" ,
0 commit comments