Skip to content

Commit 2fe24d2

Browse files
authored
Merge pull request #3271 from QuivrHQ/feat/url-add-knowledge
feat: url add knowledge
2 parents 87a7116 + fcf2b34 commit 2fe24d2

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

backend/api/quivr_api/modules/knowledge/service/knowledge_service.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,15 @@ async def create_knowledge(
231231
"knowledge_id": knowledge_db.id,
232232
},
233233
)
234+
return knowledge_db
235+
else:
236+
knowledge_db = await self.repository.update_knowledge(
237+
knowledge_db,
238+
KnowledgeUpdate(status=KnowledgeStatus.UPLOADED),
239+
autocommit=autocommit,
240+
)
241+
return knowledge_db
234242

235-
return knowledge_db
236243
except Exception as e:
237244
logger.exception(
238245
f"Error uploading knowledge {knowledgedb.id} to storage : {e}"

backend/api/quivr_api/modules/knowledge/tests/test_knowledge_service.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,30 @@ async def test_create_knowledge_file(session: AsyncSession, user: User):
517517
storage.knowledge_exists(km)
518518

519519

520+
@pytest.mark.asyncio(loop_scope="session")
521+
async def test_create_knowledge_web(session: AsyncSession, user: User):
522+
assert user.id
523+
storage = FakeStorage()
524+
repository = KnowledgeRepository(session)
525+
service = KnowledgeService(repository, storage)
526+
527+
km_to_add = AddKnowledge(
528+
url="http://quivr.app",
529+
source=KnowledgeSource.WEB,
530+
is_folder=False,
531+
parent_id=None,
532+
)
533+
534+
km = await service.create_knowledge(
535+
user_id=user.id, knowledge_to_add=km_to_add, upload_file=None
536+
)
537+
538+
assert km.id
539+
assert km.url == km_to_add.url
540+
assert km.status == KnowledgeStatus.UPLOADED
541+
assert not km.is_folder
542+
543+
520544
@pytest.mark.asyncio(loop_scope="session")
521545
async def test_create_knowledge_folder(session: AsyncSession, user: User):
522546
assert user.id
@@ -555,7 +579,7 @@ async def test_create_knowledge_folder(session: AsyncSession, user: User):
555579

556580

557581
@pytest.mark.asyncio(loop_scope="session")
558-
async def test_create_knowledge_file_in_folder(
582+
async def test_create_knowledge_file_in_folder_in_brain(
559583
monkeypatch, session: AsyncSession, user: User, folder_km_brain: KnowledgeDB
560584
):
561585
tasks = {}
@@ -948,10 +972,6 @@ async def test_list_knowledge_root(session: AsyncSession, user: User):
948972
assert len(root_kms) == 2
949973
assert {k.id for k in root_kms} == {root_folder.id, root_file.id}
950974

951-
# check order
952-
assert root_kms[0].file_name == "folder"
953-
assert root_kms[1].file_name == "file_1"
954-
955975

956976
@pytest.mark.asyncio(loop_scope="session")
957977
async def test_list_knowledge(session: AsyncSession, user: User):

backend/worker/quivr_worker/process/README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,17 @@ If an exception occurs during the parsing loop, the following steps are taken:
4949
- This operation should be rolled back if an error occurs afterward. Otherwise, the knowledge could remain in `Processing` or `ERROR` status with associated vectors.
5050
- Reprocessing the knowledge would result in reinserting the vectors into the database, leading to duplicate vectors for the same knowledge.
5151

52-
2. Set the knowledge status to `ERROR`.
53-
3. Continue processing.
52+
**Transaction Safety for Each Operation:**
53+
54+
- **Creating knowledge and linking to brains**: These operations can be retried safely. Knowledge is only recreated if it does not already exist in the database, allowing for safe retry.
55+
- **Downloading sync files**: This operation is idempotent but is safe to retry. If a change has occured, we would download the last version of the file.
56+
- **Linking knowledge to brains**: Only links the brain if it is not already associated with the knowledge. Safe for retry.
57+
- **Creating vectors**:
58+
- This operation should be rolled back if an error occurs afterward. Otherwise, the knowledge could remain in `Processing` or `ERROR` status with associated vectors.
59+
- Reprocessing the knowledge would result in reinserting the vectors into the database, leading to duplicate vectors for the same knowledge.
60+
61+
1. Set the knowledge status to `ERROR`.
62+
2. Continue processing.
5463

5564
| Note: This means that some knowledges will remain in an errored state. Currently, they are not automatically rescheduled for processing.
5665

0 commit comments

Comments
 (0)