@@ -531,6 +531,14 @@ async def _process_multimodal_content(
531531 doc_id = doc_id ,
532532 )
533533
534+ # Ensure LightRAG is initialized before accessing its storages
535+ init_result = await self ._ensure_lightrag_initialized ()
536+ if not init_result or not init_result .get ("success" ):
537+ self .logger .error (
538+ "LightRAG initialization failed; skipping multimodal processing"
539+ )
540+ return
541+
534542 # Check multimodal processing status - handle LightRAG's early DocStatus.PROCESSED marking
535543 try :
536544 existing_doc_status = await self .lightrag .doc_status .get_by_id (doc_id )
@@ -573,9 +581,6 @@ async def _process_multimodal_content(
573581 pipeline_status ["history_messages" ].append (log_message )
574582
575583 try :
576- # Ensure LightRAG is initialized
577- await self ._ensure_lightrag_initialized ()
578-
579584 await self ._process_multimodal_content_batch_type_aware (
580585 multimodal_items = multimodal_items , file_path = file_path , doc_id = doc_id
581586 )
@@ -1540,7 +1545,11 @@ async def process_document_complete(
15401545
15411546 try :
15421547 # Ensure LightRAG is initialized
1543- await self ._ensure_lightrag_initialized ()
1548+ init_result = await self ._ensure_lightrag_initialized ()
1549+ if not init_result or not init_result .get ("success" ):
1550+ raise RuntimeError (
1551+ f"LightRAG initialization failed: { (init_result or {}).get ('error' , 'unknown error' )} "
1552+ )
15441553
15451554 # Use config defaults if not provided
15461555 if output_dir is None :
@@ -1675,25 +1684,61 @@ async def process_document_complete_lightrag_api(
16751684 doc_pre_id = f"doc-pre-{ file_name } "
16761685 pipeline_status = None
16771686 pipeline_status_lock = None
1687+ current_doc_status = {} # initialised here so the except block can always unpack it
1688+
1689+ async def mark_initialization_failed (error_msg : str ) -> None :
1690+ """Persist init failures when LightRAG doc_status is already available."""
1691+ lightrag = getattr (self , "lightrag" , None )
1692+ doc_status = getattr (lightrag , "doc_status" , None )
1693+ if doc_status is None :
1694+ self .logger .error (
1695+ "LightRAG initialization failed before doc_status was available; "
1696+ f"unable to persist failed status for { file_path } "
1697+ )
1698+ return
1699+
1700+ try :
1701+ existing_status = await doc_status .get_by_id (doc_pre_id )
1702+ failed_status = {
1703+ "status" : DocStatus .FAILED ,
1704+ "content" : "" ,
1705+ "error_msg" : error_msg ,
1706+ "content_summary" : "" ,
1707+ "multimodal_content" : [],
1708+ "scheme_name" : scheme_name ,
1709+ "content_length" : 0 ,
1710+ "created_at" : "" ,
1711+ "updated_at" : time .strftime ("%Y-%m-%dT%H:%M:%S+00:00" ),
1712+ "file_path" : file_name ,
1713+ }
1714+ if existing_status :
1715+ failed_status = {
1716+ ** existing_status ,
1717+ "status" : DocStatus .FAILED ,
1718+ "error_msg" : error_msg ,
1719+ "updated_at" : time .strftime ("%Y-%m-%dT%H:%M:%S+00:00" ),
1720+ }
1721+ await doc_status .upsert ({doc_pre_id : failed_status })
1722+ await doc_status .index_done_callback ()
1723+ except Exception as status_error :
1724+ self .logger .error (
1725+ f"Failed to persist initialization failure status for { file_path } : "
1726+ f"{ status_error } "
1727+ )
16781728
16791729 if parser :
16801730 self .config .parser = parser
16811731
1682- current_doc_status = await self .lightrag .doc_status .get_by_id (doc_pre_id )
1683-
16841732 try :
1685- # Ensure LightRAG is initialized
1733+ # Ensure LightRAG is initialized before accessing its storages
16861734 result = await self ._ensure_lightrag_initialized ()
1687- if not result ["success" ]:
1688- await self .lightrag .doc_status .upsert (
1689- {
1690- doc_pre_id : {
1691- ** current_doc_status ,
1692- "status" : DocStatus .FAILED ,
1693- "error_msg" : result ["error" ],
1694- }
1695- }
1735+ if not result or not result .get ("success" ):
1736+ error_msg = (result or {}).get ("error" , "unknown error" )
1737+ self .logger .error (
1738+ f"LightRAG initialization failed: { error_msg } ; "
1739+ f"skipping document processing for { file_path } "
16961740 )
1741+ await mark_initialization_failed (str (error_msg ))
16971742 return False
16981743
16991744 # Use config defaults if not provided
@@ -1761,9 +1806,10 @@ async def process_document_complete_lightrag_api(
17611806 file_path , output_dir , parse_method , display_stats , ** kwargs
17621807 )
17631808 except MineruExecutionError as e :
1764- error_message = e .error_msg
17651809 if isinstance (e .error_msg , list ):
1766- error_message = "\n " .join (e .error_msg )
1810+ error_message = "\n " .join (str (m ) for m in e .error_msg )
1811+ else :
1812+ error_message = str (e .error_msg )
17671813 await self .lightrag .doc_status .upsert (
17681814 {
17691815 doc_pre_id : {
@@ -1859,15 +1905,23 @@ async def process_document_complete_lightrag_api(
18591905 return False
18601906
18611907 finally :
1862- async with pipeline_status_lock :
1863- pipeline_status .update ({"scan_disabled" : False })
1864- pipeline_status ["latest_message" ] = (
1865- f"RAGAnything processing completed for { file_name } "
1866- )
1867- pipeline_status ["history_messages" ].append (
1868- f"RAGAnything processing completed for { file_name } "
1869- )
1870- pipeline_status ["history_messages" ].append ("Now is allowed to scan" )
1908+ if pipeline_status_lock is not None and pipeline_status is not None :
1909+ try :
1910+ async with pipeline_status_lock :
1911+ pipeline_status .update ({"scan_disabled" : False })
1912+ pipeline_status ["latest_message" ] = (
1913+ f"RAGAnything processing completed for { file_name } "
1914+ )
1915+ pipeline_status ["history_messages" ].append (
1916+ f"RAGAnything processing completed for { file_name } "
1917+ )
1918+ pipeline_status ["history_messages" ].append (
1919+ "Now is allowed to scan"
1920+ )
1921+ except Exception as _finally_err :
1922+ self .logger .error (
1923+ f"Failed to update pipeline status in finally block: { _finally_err } "
1924+ )
18711925
18721926 async def insert_content_list (
18731927 self ,
@@ -1907,7 +1961,11 @@ async def insert_content_list(
19071961 doc_start_time = time .time ()
19081962
19091963 # Ensure LightRAG is initialized
1910- await self ._ensure_lightrag_initialized ()
1964+ init_result = await self ._ensure_lightrag_initialized ()
1965+ if not init_result or not init_result .get ("success" ):
1966+ raise RuntimeError (
1967+ f"LightRAG initialization failed: { (init_result or {}).get ('error' , 'unknown error' )} "
1968+ )
19111969
19121970 # Use config defaults if not provided
19131971 if display_stats is None :
0 commit comments