Skip to content

Commit c48314e

Browse files
committed
fix(streming) accumulating json parsing when streaming is True
1 parent b89e926 commit c48314e

File tree

3 files changed

+44
-18
lines changed

3 files changed

+44
-18
lines changed

agentle/generations/providers/google/adapters/generate_generate_content_response_to_generation_adapter.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import datetime
44
import logging
55
import uuid
6-
from collections.abc import AsyncIterator
6+
from collections.abc import AsyncGenerator, AsyncIterator
77
from logging import Logger
88
from typing import TYPE_CHECKING, Any, Literal, cast, overload
99

@@ -88,11 +88,11 @@ def adapt(self, _f: "GenerateContentResponse") -> Generation[T]: ...
8888
@overload
8989
def adapt(
9090
self, _f: AsyncIterator["GenerateContentResponse"]
91-
) -> AsyncIterator[Generation[T]]: ...
91+
) -> AsyncGenerator[Generation[T], None]: ...
9292

9393
def adapt(
9494
self, _f: "GenerateContentResponse | AsyncIterator[GenerateContentResponse]"
95-
) -> Generation[T] | AsyncIterator[Generation[T]]:
95+
) -> Generation[T] | AsyncGenerator[Generation[T], None]:
9696
"""
9797
Convert Google response(s) to Agentle Generation object(s).
9898
@@ -214,7 +214,7 @@ async def _adapt_single_async(
214214

215215
async def _adapt_streaming(
216216
self, response_stream: AsyncIterator["GenerateContentResponse"]
217-
) -> AsyncIterator[Generation[T]]:
217+
) -> AsyncGenerator[Generation[T], None]:
218218
"""Adapt a streaming response with proper text accumulation."""
219219
generation_id = self.preferred_id or uuid.uuid4()
220220
created_time = datetime.datetime.now()

agentle/utils/parse_streaming_json.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ def parse_streaming_json[T: BaseModel](potential_json: str | None, model: type[T
2121
if potential_json is None:
2222
return model()
2323

24-
# print(f"parsing: {potential_json}")
25-
2624
def find_json_boundaries(text: str) -> tuple[int, int]:
2725
"""Find the start and potential end of JSON in the text."""
2826

@@ -95,17 +93,32 @@ def fix_common_json_issues(json_str: str) -> str:
9593
# Remove any leading/trailing whitespace
9694
json_str = json_str.strip()
9795

98-
# Fix missing closing quotes on string values (at the end)
99-
# Look for patterns like: "key": "value without closing quote
100-
json_str = re.sub(r':\s*"([^"]*?)(?:\s*[,}]|$)', r': "\1"', json_str)
101-
102-
# Fix missing closing quotes for keys
103-
# Look for patterns like: "key without quotes:
104-
json_str = re.sub(r'"([^"]*?)(?=\s*:)', r'"\1"', json_str)
105-
10696
# Remove trailing commas before closing braces/brackets
10797
json_str = re.sub(r",\s*([}\]])", r"\1", json_str)
10898

99+
# For streaming JSON, we need to handle incomplete strings carefully
100+
# Check if we have an unclosed string at the end
101+
in_string = False
102+
escape_next = False
103+
last_quote_pos = -1
104+
105+
for i, char in enumerate(json_str):
106+
if escape_next:
107+
escape_next = False
108+
continue
109+
if char == '\\':
110+
escape_next = True
111+
continue
112+
if char == '"':
113+
in_string = not in_string
114+
if in_string:
115+
last_quote_pos = i
116+
117+
# If we're in a string at the end (incomplete), close it properly
118+
if in_string and last_quote_pos != -1:
119+
# Add closing quote for the incomplete string
120+
json_str += '"'
121+
109122
# Ensure the JSON has proper closing braces if it appears incomplete
110123
open_braces = json_str.count("{") - json_str.count("}")
111124
open_brackets = json_str.count("[") - json_str.count("]")
@@ -124,12 +137,25 @@ def extract_data_manually(json_str: str) -> dict[str, Any]:
124137
data = {}
125138

126139
# Extract string key-value pairs with quoted keys
127-
# Pattern: "key": "value" or 'key': 'value'
128-
string_pattern = r'["\']([^"\']+)["\']:\s*["\']([^"\']*)["\']?'
129-
string_matches = re.findall(string_pattern, json_str)
140+
# IMPROVED: Handle long strings that may contain newlines, special chars, etc.
141+
# Pattern: "key": "value..." - capture everything until the next unescaped quote or EOF
142+
string_pattern = r'["\']([\w]+)["\']:\s*["\']([^"\']*?)(?:["\']|$)'
143+
string_matches = re.findall(string_pattern, json_str, re.DOTALL)
144+
145+
# Also try to capture very long strings that span multiple lines
146+
# This catches incomplete strings during streaming
147+
long_string_pattern = r'["\']([\w_]+)["\']:\s*["\'](.+?)(?:["\'],?\s*["}]|$)'
148+
long_matches = re.findall(long_string_pattern, json_str, re.DOTALL)
130149

131150
for key, value in string_matches:
132151
data[key] = value
152+
153+
# Prefer long_matches for fields that might be truncated in string_matches
154+
for key, value in long_matches:
155+
# Only override if the long match has more content
156+
existing = data.get(key, "")
157+
if key not in data or (isinstance(existing, str) and len(value) > len(existing)):
158+
data[key] = value
133159

134160
# Extract string key-value pairs with unquoted keys
135161
# Pattern: key: "value" (no quotes around key)

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)