2626from api .db .db_models import Task
2727from api .db .services import duplicate_name
2828from api .db .services .document_service import DocumentService , doc_upload_and_parse
29- from common .metadata_utils import meta_filter , convert_conditions
29+ from common .metadata_utils import meta_filter , convert_conditions , turn2jsonschema
3030from api .db .services .file2document_service import File2DocumentService
3131from api .db .services .file_service import FileService
3232from api .db .services .knowledgebase_service import KnowledgebaseService
@@ -226,6 +226,7 @@ async def list_docs():
226226 kb_id = request .args .get ("kb_id" )
227227 if not kb_id :
228228 return get_json_result (data = False , message = 'Lack of "KB ID"' , code = RetCode .ARGUMENT_ERROR )
229+
229230 tenants = UserTenantService .query (user_id = current_user .id )
230231 for tenant in tenants :
231232 if KnowledgebaseService .query (tenant_id = tenant .tenant_id , id = kb_id ):
@@ -345,6 +346,8 @@ async def list_docs():
345346 doc_item ["thumbnail" ] = f"/v1/document/image/{ kb_id } -{ doc_item ['thumbnail' ]} "
346347 if doc_item .get ("source_type" ):
347348 doc_item ["source_type" ] = doc_item ["source_type" ].split ("/" )[0 ]
349+ if doc_item ["parser_config" ].get ("metadata" ):
350+ doc_item ["parser_config" ]["metadata" ] = turn2jsonschema (doc_item ["parser_config" ]["metadata" ])
348351
349352 return get_json_result (data = {"total" : tol , "docs" : docs })
350353 except Exception as e :
@@ -406,6 +409,7 @@ async def doc_infos():
406409async def metadata_summary ():
407410 req = await get_request_json ()
408411 kb_id = req .get ("kb_id" )
412+ doc_ids = req .get ("doc_ids" )
409413 if not kb_id :
410414 return get_json_result (data = False , message = 'Lack of "KB ID"' , code = RetCode .ARGUMENT_ERROR )
411415
@@ -417,69 +421,33 @@ async def metadata_summary():
417421 return get_json_result (data = False , message = "Only owner of dataset authorized for this operation." , code = RetCode .OPERATING_ERROR )
418422
419423 try :
420- summary = DocumentService .get_metadata_summary (kb_id )
424+ summary = DocumentService .get_metadata_summary (kb_id , doc_ids )
421425 return get_json_result (data = {"summary" : summary })
422426 except Exception as e :
423427 return server_error_response (e )
424428
425429
426430@manager .route ("/metadata/update" , methods = ["POST" ]) # noqa: F821
427431@login_required
432+ @validate_request ("doc_ids" )
428433async def metadata_update ():
429434 req = await get_request_json ()
430- kb_id = req .get ("kb_id" )
431- if not kb_id :
432- return get_json_result (data = False , message = 'Lack of "KB ID"' , code = RetCode .ARGUMENT_ERROR )
433-
434- tenants = UserTenantService .query (user_id = current_user .id )
435- for tenant in tenants :
436- if KnowledgebaseService .query (tenant_id = tenant .tenant_id , id = kb_id ):
437- break
438- else :
439- return get_json_result (data = False , message = "Only owner of dataset authorized for this operation." , code = RetCode .OPERATING_ERROR )
440-
441- selector = req .get ("selector" , {}) or {}
435+ document_ids = req .get ("doc_ids" )
442436 updates = req .get ("updates" , []) or []
443437 deletes = req .get ("deletes" , []) or []
444438
445- if not isinstance (selector , dict ):
446- return get_json_result (data = False , message = "selector must be an object." , code = RetCode .ARGUMENT_ERROR )
447439 if not isinstance (updates , list ) or not isinstance (deletes , list ):
448440 return get_json_result (data = False , message = "updates and deletes must be lists." , code = RetCode .ARGUMENT_ERROR )
449441
450- metadata_condition = selector .get ("metadata_condition" , {}) or {}
451- if metadata_condition and not isinstance (metadata_condition , dict ):
452- return get_json_result (data = False , message = "metadata_condition must be an object." , code = RetCode .ARGUMENT_ERROR )
453-
454- document_ids = selector .get ("document_ids" , []) or []
455- if document_ids and not isinstance (document_ids , list ):
456- return get_json_result (data = False , message = "document_ids must be a list." , code = RetCode .ARGUMENT_ERROR )
457-
458442 for upd in updates :
459443 if not isinstance (upd , dict ) or not upd .get ("key" ) or "value" not in upd :
460444 return get_json_result (data = False , message = "Each update requires key and value." , code = RetCode .ARGUMENT_ERROR )
461445 for d in deletes :
462446 if not isinstance (d , dict ) or not d .get ("key" ):
463447 return get_json_result (data = False , message = "Each delete requires key." , code = RetCode .ARGUMENT_ERROR )
464448
465- kb_doc_ids = KnowledgebaseService .list_documents_by_ids ([kb_id ])
466- target_doc_ids = set (kb_doc_ids )
467- if document_ids :
468- invalid_ids = set (document_ids ) - set (kb_doc_ids )
469- if invalid_ids :
470- return get_json_result (data = False , message = f"These documents do not belong to dataset { kb_id } : { ', ' .join (invalid_ids )} " , code = RetCode .ARGUMENT_ERROR )
471- target_doc_ids = set (document_ids )
472-
473- if metadata_condition :
474- metas = DocumentService .get_flatted_meta_by_kbs ([kb_id ])
475- filtered_ids = set (meta_filter (metas , convert_conditions (metadata_condition ), metadata_condition .get ("logic" , "and" )))
476- target_doc_ids = target_doc_ids & filtered_ids
477- if metadata_condition .get ("conditions" ) and not target_doc_ids :
478- return get_json_result (data = {"updated" : 0 , "matched_docs" : 0 })
479-
480- target_doc_ids = list (target_doc_ids )
481- updated = DocumentService .batch_update_metadata (kb_id , target_doc_ids , updates , deletes )
482- return get_json_result (data = {"updated" : updated , "matched_docs" : len (target_doc_ids )})
449+ updated = DocumentService .batch_update_metadata (None , document_ids , updates , deletes )
450+ return get_json_result (data = {"updated" : updated })
483451
484452
485453@manager .route ("/update_metadata_setting" , methods = ["POST" ]) # noqa: F821
0 commit comments