|
21 | 21 | logger = logging.getLogger(__name__) |
22 | 22 |
|
23 | 23 |
|
| 24 | +def _safe_json_loads(content: str, filepath: str = "") -> dict: |
| 25 | + """Parse JSON with corruption recovery. |
| 26 | +
|
| 27 | + Attempts standard ``json.loads`` first. If that fails due to |
| 28 | + trailing garbage (a common symptom of concurrent-write race |
| 29 | + conditions), falls back to ``raw_decode`` to extract the first |
| 30 | + valid JSON object. If the file is completely unparseable, returns |
| 31 | + an empty dict and logs a warning so callers never crash. |
| 32 | +
|
| 33 | + Args: |
| 34 | + content: Raw file content. |
| 35 | + filepath: Used only for log messages. |
| 36 | +
|
| 37 | + Returns: |
| 38 | + Parsed dict, or ``{}`` when the content is beyond recovery. |
| 39 | + """ |
| 40 | + try: |
| 41 | + return json.loads(content) |
| 42 | + except json.JSONDecodeError: |
| 43 | + pass |
| 44 | + |
| 45 | + # Try to extract the first valid JSON object. |
| 46 | + try: |
| 47 | + result, _ = json.JSONDecoder().raw_decode(content) |
| 48 | + logger.warning( |
| 49 | + "Session file %s had corrupted JSON. " |
| 50 | + "Recovered first valid object via raw_decode.", |
| 51 | + filepath, |
| 52 | + ) |
| 53 | + return result |
| 54 | + except json.JSONDecodeError: |
| 55 | + logger.warning( |
| 56 | + "Session file %s is completely corrupted and could not " |
| 57 | + "be recovered. Returning empty dict.", |
| 58 | + filepath, |
| 59 | + ) |
| 60 | + return {} |
| 61 | + |
| 62 | + |
24 | 63 | # Characters forbidden in Windows filenames |
25 | 64 | _UNSAFE_FILENAME_RE = re.compile(r'[\\/:*?"<>|]') |
26 | 65 |
|
@@ -111,18 +150,7 @@ async def load_session_state( |
111 | 150 | errors="surrogatepass", |
112 | 151 | ) as f: |
113 | 152 | content = await f.read() |
114 | | - try: |
115 | | - states = json.loads(content) |
116 | | - except json.JSONDecodeError: |
117 | | - # Fallback: extract first valid JSON object (handles |
118 | | - # race-condition corruption where two writes overlap) |
119 | | - decoder = json.JSONDecoder() |
120 | | - states, _ = decoder.raw_decode(content) |
121 | | - logger.warning( |
122 | | - "Session file %s had corrupted JSON (Extra data). " |
123 | | - "Recovered first valid object.", |
124 | | - session_save_path, |
125 | | - ) |
| 153 | + states = _safe_json_loads(content, session_save_path) |
126 | 154 |
|
127 | 155 | for name, state_module in state_modules_mapping.items(): |
128 | 156 | if name in states: |
@@ -165,16 +193,7 @@ async def update_session_state( |
165 | 193 | errors="surrogatepass", |
166 | 194 | ) as f: |
167 | 195 | content = await f.read() |
168 | | - try: |
169 | | - states = json.loads(content) |
170 | | - except json.JSONDecodeError: |
171 | | - decoder = json.JSONDecoder() |
172 | | - states, _ = decoder.raw_decode(content) |
173 | | - logger.warning( |
174 | | - "Session file %s had corrupted JSON during update. " |
175 | | - "Recovered first valid object.", |
176 | | - session_save_path, |
177 | | - ) |
| 196 | + states = _safe_json_loads(content, session_save_path) |
178 | 197 |
|
179 | 198 | else: |
180 | 199 | if not create_if_not_exist: |
@@ -243,16 +262,7 @@ async def get_session_state_dict( |
243 | 262 | errors="surrogatepass", |
244 | 263 | ) as file: |
245 | 264 | content = await file.read() |
246 | | - try: |
247 | | - states = json.loads(content) |
248 | | - except json.JSONDecodeError: |
249 | | - decoder = json.JSONDecoder() |
250 | | - states, _ = decoder.raw_decode(content) |
251 | | - logger.warning( |
252 | | - "Session file %s had corrupted JSON during get. " |
253 | | - "Recovered first valid object.", |
254 | | - session_save_path, |
255 | | - ) |
| 265 | + states = _safe_json_loads(content, session_save_path) |
256 | 266 |
|
257 | 267 | logger.info( |
258 | 268 | "Get session state dict from %s successfully.", |
|
0 commit comments