Skip to content

Commit 84032ee

Browse files
committed
refactor(session): extract _safe_json_loads to DRY corruption recovery
- Extract duplicated raw_decode fallback into _safe_json_loads() helper - Add total-corruption fallback: returns empty dict instead of raising - Add 3 test cases for completely garbled files - Addresses gemini-code-assist review feedback on PR agentscope-ai#3278
1 parent ddbbd67 commit 84032ee

2 files changed

Lines changed: 96 additions & 32 deletions

File tree

src/copaw/app/runner/session.py

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,45 @@
2121
logger = logging.getLogger(__name__)
2222

2323

24+
def _safe_json_loads(content: str, filepath: str = "") -> dict:
25+
"""Parse JSON with corruption recovery.
26+
27+
Attempts standard ``json.loads`` first. If that fails due to
28+
trailing garbage (a common symptom of concurrent-write race
29+
conditions), falls back to ``raw_decode`` to extract the first
30+
valid JSON object. If the file is completely unparseable, returns
31+
an empty dict and logs a warning so callers never crash.
32+
33+
Args:
34+
content: Raw file content.
35+
filepath: Used only for log messages.
36+
37+
Returns:
38+
Parsed dict, or ``{}`` when the content is beyond recovery.
39+
"""
40+
try:
41+
return json.loads(content)
42+
except json.JSONDecodeError:
43+
pass
44+
45+
# Try to extract the first valid JSON object.
46+
try:
47+
result, _ = json.JSONDecoder().raw_decode(content)
48+
logger.warning(
49+
"Session file %s had corrupted JSON. "
50+
"Recovered first valid object via raw_decode.",
51+
filepath,
52+
)
53+
return result
54+
except json.JSONDecodeError:
55+
logger.warning(
56+
"Session file %s is completely corrupted and could not "
57+
"be recovered. Returning empty dict.",
58+
filepath,
59+
)
60+
return {}
61+
62+
2463
# Characters forbidden in Windows filenames
2564
_UNSAFE_FILENAME_RE = re.compile(r'[\\/:*?"<>|]')
2665

@@ -111,18 +150,7 @@ async def load_session_state(
111150
errors="surrogatepass",
112151
) as f:
113152
content = await f.read()
114-
try:
115-
states = json.loads(content)
116-
except json.JSONDecodeError:
117-
# Fallback: extract first valid JSON object (handles
118-
# race-condition corruption where two writes overlap)
119-
decoder = json.JSONDecoder()
120-
states, _ = decoder.raw_decode(content)
121-
logger.warning(
122-
"Session file %s had corrupted JSON (Extra data). "
123-
"Recovered first valid object.",
124-
session_save_path,
125-
)
153+
states = _safe_json_loads(content, session_save_path)
126154

127155
for name, state_module in state_modules_mapping.items():
128156
if name in states:
@@ -165,16 +193,7 @@ async def update_session_state(
165193
errors="surrogatepass",
166194
) as f:
167195
content = await f.read()
168-
try:
169-
states = json.loads(content)
170-
except json.JSONDecodeError:
171-
decoder = json.JSONDecoder()
172-
states, _ = decoder.raw_decode(content)
173-
logger.warning(
174-
"Session file %s had corrupted JSON during update. "
175-
"Recovered first valid object.",
176-
session_save_path,
177-
)
196+
states = _safe_json_loads(content, session_save_path)
178197

179198
else:
180199
if not create_if_not_exist:
@@ -243,16 +262,7 @@ async def get_session_state_dict(
243262
errors="surrogatepass",
244263
) as file:
245264
content = await file.read()
246-
try:
247-
states = json.loads(content)
248-
except json.JSONDecodeError:
249-
decoder = json.JSONDecoder()
250-
states, _ = decoder.raw_decode(content)
251-
logger.warning(
252-
"Session file %s had corrupted JSON during get. "
253-
"Recovered first valid object.",
254-
session_save_path,
255-
)
265+
states = _safe_json_loads(content, session_save_path)
256266

257267
logger.info(
258268
"Get session state dict from %s successfully.",

tests/unit/agents/test_session.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,57 @@ async def test_get_nonexistent(sess):
180180
user_id="",
181181
)
182182
assert result == {}
183+
184+
185+
# ── completely corrupted file ──────────────────────────────────────
186+
187+
188+
@pytest.mark.asyncio
189+
async def test_load_completely_corrupted(sess, tmp_session_dir):
190+
"""File with no valid JSON at all should not crash (returns empty)."""
191+
path = os.path.join(tmp_session_dir, "test--session.json")
192+
with open(path, "w", encoding="utf-8") as f:
193+
f.write("{{{THIS IS NOT JSON AT ALL!!!")
194+
195+
mod = FakeModule()
196+
await sess.load_session_state(
197+
"test:session",
198+
user_id="",
199+
memory=mod,
200+
)
201+
# memory key not in recovered (empty) dict → data stays None
202+
assert mod.data is None
203+
204+
205+
@pytest.mark.asyncio
206+
async def test_get_completely_corrupted(sess, tmp_session_dir):
207+
"""get_session_state_dict returns empty dict for totally garbled file."""
208+
path = os.path.join(tmp_session_dir, "test--session.json")
209+
with open(path, "w", encoding="utf-8") as f:
210+
f.write("NOT JSON {{{{")
211+
212+
result = await sess.get_session_state_dict(
213+
"test:session",
214+
user_id="",
215+
)
216+
assert result == {}
217+
218+
219+
@pytest.mark.asyncio
220+
async def test_update_completely_corrupted(sess, tmp_session_dir):
221+
"""update_session_state recovers from total corruption
222+
by starting fresh."""
223+
path = os.path.join(tmp_session_dir, "test--session.json")
224+
with open(path, "w", encoding="utf-8") as f:
225+
f.write("GARBAGE DATA !!!")
226+
227+
await sess.update_session_state(
228+
"test:session",
229+
key="memory.content",
230+
value=["recovered"],
231+
user_id="",
232+
)
233+
234+
with open(path, encoding="utf-8") as f:
235+
result = json.load(f)
236+
assert result["memory"]["content"] == ["recovered"]

0 commit comments

Comments
 (0)