Skip to content

Commit 89a5521

Browse files
authored
Merge pull request #165 from ikmak/main
Enhance the `process_folder_complete` function
2 parents eeb63ed + 32c408e commit 89a5521

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

raganything/batch.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,32 @@ async def process_folder_complete(
104104

105105
async def process_single_file(file_path: Path):
106106
async with semaphore:
107+
is_in_subdir = (
108+
lambda file_path, dir_path: len(
109+
file_path.relative_to(dir_path).parents
110+
)
111+
> 1
112+
)(file_path, folder_path_obj)
113+
107114
try:
108115
await self.process_document_complete(
109116
str(file_path),
110-
output_dir=output_dir,
117+
output_dir=(
118+
output_dir
119+
if not is_in_subdir
120+
else str(
121+
output_path
122+
/ file_path.parent.relative_to(folder_path_obj)
123+
)
124+
),
111125
parse_method=parse_method,
112126
split_by_character=split_by_character,
113127
split_by_character_only=split_by_character_only,
128+
file_name=(
129+
None
130+
if not is_in_subdir
131+
else str(file_path.relative_to(folder_path_obj))
132+
),
114133
)
115134
return True, str(file_path), None
116135
except Exception as e:

raganything/processor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,7 @@ async def process_document_complete(
14221422
split_by_character: str | None = None,
14231423
split_by_character_only: bool = False,
14241424
doc_id: str | None = None,
1425+
file_name: str | None = None,
14251426
**kwargs,
14261427
):
14271428
"""
@@ -1473,7 +1474,8 @@ async def process_document_complete(
14731474

14741475
# Step 3: Insert pure text content with all parameters
14751476
if text_content.strip():
1476-
file_name = os.path.basename(file_path)
1477+
if file_name is None:
1478+
file_name = os.path.basename(file_path)
14771479
await insert_text_content(
14781480
self.lightrag,
14791481
input=text_content,

0 commit comments

Comments
 (0)