Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath-langchain"
version = "0.0.135"
version = "0.0.136"
description = "UiPath Langchain"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.10"
Expand Down
8 changes: 8 additions & 0 deletions src/uipath_langchain/_cli/_runtime/_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,14 @@ def map_message(
content_part_sequence=idx,
),
)
elif isinstance(message.content, str) and message.content:
msg_event.content_part = UiPathConversationContentPartEvent(
content_part_id=f"content-{message.id}",
chunk=UiPathConversationContentPartChunkEvent(
data=message.content,
content_part_sequence=0,
),
)

stop_reason = message.response_metadata.get("stop_reason")
if not message.content and stop_reason in ("tool_use", "end_turn"):
Expand Down
140 changes: 137 additions & 3 deletions src/uipath_langchain/chat/models.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import json
import logging
from typing import Any, Dict, List, Literal, Optional, Union
from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Optional, Union

from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
)
from langchain_core.language_models import LanguageModelInput
from langchain_core.messages import AIMessage, BaseMessage
from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
from langchain_core.messages.ai import UsageMetadata
from langchain_core.outputs import ChatGeneration, ChatResult
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable
from langchain_openai.chat_models import AzureChatOpenAI
from pydantic import BaseModel
Expand Down Expand Up @@ -49,6 +49,54 @@ async def _agenerate(
response = await self._acall(self.url, payload, self.auth_headers)
return self._create_chat_result(response)

def _stream(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[ChatGenerationChunk]:
if "tools" in kwargs and not kwargs["tools"]:
del kwargs["tools"]
payload = self._get_request_payload(messages, stop=stop, **kwargs)
response = self._call(self.url, payload, self.auth_headers)

# For non-streaming response, yield single chunk
chat_result = self._create_chat_result(response)
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
)
)
yield chunk
Comment on lines +62 to +74
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The streaming implementation returns a single chunk instead of true streaming. Consider implementing actual streaming by making a streaming request to the API or clearly document this as a fallback implementation.

Copilot uses AI. Check for mistakes.


async def _astream(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> AsyncIterator[ChatGenerationChunk]:
if "tools" in kwargs and not kwargs["tools"]:
del kwargs["tools"]
payload = self._get_request_payload(messages, stop=stop, **kwargs)
response = await self._acall(self.url, payload, self.auth_headers)

# For non-streaming response, yield single chunk
chat_result = self._create_chat_result(response)
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
)
)
yield chunk

Comment on lines +86 to +99
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The async streaming implementation also returns a single chunk instead of true streaming. This duplicates the same non-streaming behavior as the sync version.

Suggested change
response = await self._acall(self.url, payload, self.auth_headers)
# For non-streaming response, yield single chunk
chat_result = self._create_chat_result(response)
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
)
)
yield chunk
# Assume _acall returns an async iterator over streamed response chunks
async for chunk_data in self._acall(self.url, payload, self.auth_headers, stream=True):
# Convert each streamed chunk to ChatGenerationChunk
chat_result = self._create_chat_result(chunk_data)
yield ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
)
)

Copilot uses AI. Check for mistakes.

def with_structured_output(
self,
schema: Optional[Any] = None,
Expand Down Expand Up @@ -217,6 +265,92 @@ async def _agenerate(
response = await self._acall(self.url, payload, self.auth_headers)
return self._create_chat_result(response)

def _stream(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[ChatGenerationChunk]:
"""Stream the LLM on a given prompt.

Args:
messages: the prompt composed of a list of messages.
stop: a list of strings on which the model should stop generating.
run_manager: A run manager with callbacks for the LLM.
**kwargs: Additional keyword arguments.

Returns:
An iterator of ChatGenerationChunk objects.
"""
if kwargs.get("tools"):
kwargs["tools"] = [tool["function"] for tool in kwargs["tools"]]
if "tool_choice" in kwargs and kwargs["tool_choice"]["type"] == "function":
kwargs["tool_choice"] = {
"type": "tool",
"name": kwargs["tool_choice"]["function"]["name"],
}
payload = self._get_request_payload(messages, stop=stop, **kwargs)
response = self._call(self.url, payload, self.auth_headers)

# For non-streaming response, yield single chunk
chat_result = self._create_chat_result(response)
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
tool_calls=getattr(
chat_result.generations[0].message, "tool_calls", None
),
)
)
yield chunk
Comment on lines +294 to +309
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the third instance of duplicated non-streaming logic in streaming methods. The code pattern is repeated across multiple methods with only minor variations.

Copilot uses AI. Check for mistakes.


async def _astream(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> AsyncIterator[ChatGenerationChunk]:
"""Async stream the LLM on a given prompt.

Args:
messages: the prompt composed of a list of messages.
stop: a list of strings on which the model should stop generating.
run_manager: A run manager with callbacks for the LLM.
**kwargs: Additional keyword arguments.

Returns:
An async iterator of ChatGenerationChunk objects.
"""
if kwargs.get("tools"):
kwargs["tools"] = [tool["function"] for tool in kwargs["tools"]]
if "tool_choice" in kwargs and kwargs["tool_choice"]["type"] == "function":
kwargs["tool_choice"] = {
"type": "tool",
"name": kwargs["tool_choice"]["function"]["name"],
}
payload = self._get_request_payload(messages, stop=stop, **kwargs)
response = await self._acall(self.url, payload, self.auth_headers)

# For non-streaming response, yield single chunk
chat_result = self._create_chat_result(response)
chunk = ChatGenerationChunk(
message=AIMessageChunk(
content=chat_result.generations[0].message.content,
additional_kwargs=chat_result.generations[0].message.additional_kwargs,
response_metadata=chat_result.generations[0].message.response_metadata,
usage_metadata=chat_result.generations[0].message.usage_metadata, # type: ignore
tool_calls=getattr(
chat_result.generations[0].message, "tool_calls", None
),
)
)
yield chunk
Comment on lines +337 to +352
Copy link
Preview

Copilot AI Sep 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fourth instance of the same duplicated non-streaming implementation. Consider extracting this chunk creation logic into a shared helper method to reduce code duplication.

Copilot uses AI. Check for mistakes.


def with_structured_output(
self,
schema: Optional[Any] = None,
Expand Down
Loading
Loading