From 5fbff29ed19f5ea4cac45360586dc6dc4512b595 Mon Sep 17 00:00:00 2001 From: 0912078 <0912078@naver.com> Date: Fri, 3 Apr 2026 12:16:54 +0900 Subject: [PATCH 1/3] fix: strip tags from modal processor fallback responses (closes #159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reasoning models (DeepSeek-R1, Qwen2.5-think, etc.) wrap their chain-of-thought in blocks before emitting the final answer. When _robust_json_parse fails to extract a valid JSON object from the response, the four modal-processor parse methods (_parse_response, _parse_table_response, _parse_equation_response, _parse_generic_response) were returning the **raw** LLM response as the fallback caption and summary. This caused internal model reasoning to be stored in the knowledge graph instead of the actual content description. Fix: add a static helper `BaseModalProcessor._strip_thinking_tags` that removes / blocks (case-insensitive, multiline) and apply it in every fallback branch so only the final-answer text is stored or returned. The helper is tested in tests/test_strip_thinking_tags.py with 13 unit tests covering: tag variants, multiline blocks, multiple blocks, case-insensitivity, and the full fallback path for all four processor classes. --- raganything/modalprocessors.py | 52 ++++++++++--- tests/test_strip_thinking_tags.py | 125 ++++++++++++++++++++++++++++++ 2 files changed, 165 insertions(+), 12 deletions(-) create mode 100644 tests/test_strip_thinking_tags.py diff --git a/raganything/modalprocessors.py b/raganything/modalprocessors.py index f73505af2..022de08ba 100644 --- a/raganything/modalprocessors.py +++ b/raganything/modalprocessors.py @@ -544,6 +544,30 @@ async def _create_entity_and_chunk( chunk_results, ) + @staticmethod + def _strip_thinking_tags(text: str) -> str: + """Remove / tags produced by reasoning models. + + Models such as DeepSeek-R1 and Qwen2.5-think wrap their internal + chain-of-thought in ```` or ```` + blocks before emitting the final answer. When JSON parsing fails and + the raw LLM response is used as a fallback, storing the entire response + (including the reasoning preamble) pollutes the knowledge graph with + internal model thoughts rather than actual content descriptions. + + This helper strips those blocks so that only the final answer text is + stored or surfaced to callers. + """ + import re + + cleaned = re.sub( + r".*?", "", text, flags=re.DOTALL | re.IGNORECASE + ) + cleaned = re.sub( + r".*?", "", cleaned, flags=re.DOTALL | re.IGNORECASE + ) + return cleaned.strip() + def _robust_json_parse(self, response: str) -> dict: """Robust JSON parsing with multiple fallback strategies""" @@ -1019,14 +1043,15 @@ def _parse_response( except (json.JSONDecodeError, AttributeError, ValueError) as e: logger.error(f"Error parsing image analysis response: {e}") logger.debug(f"Raw response: {response}") + cleaned = self._strip_thinking_tags(response) fallback_entity = { "entity_name": entity_name if entity_name - else f"image_{compute_mdhash_id(response)}", + else f"image_{compute_mdhash_id(cleaned)}", "entity_type": "image", - "summary": response[:100] + "..." if len(response) > 100 else response, + "summary": cleaned[:100] + "..." if len(cleaned) > 100 else cleaned, } - return response, fallback_entity + return cleaned, fallback_entity class TableModalProcessor(BaseModalProcessor): @@ -1213,14 +1238,15 @@ def _parse_table_response( except (json.JSONDecodeError, AttributeError, ValueError) as e: logger.error(f"Error parsing table analysis response: {e}") logger.debug(f"Raw response: {response}") + cleaned = self._strip_thinking_tags(response) fallback_entity = { "entity_name": entity_name if entity_name - else f"table_{compute_mdhash_id(response)}", + else f"table_{compute_mdhash_id(cleaned)}", "entity_type": "table", - "summary": response[:100] + "..." if len(response) > 100 else response, + "summary": cleaned[:100] + "..." if len(cleaned) > 100 else cleaned, } - return response, fallback_entity + return cleaned, fallback_entity class EquationModalProcessor(BaseModalProcessor): @@ -1397,14 +1423,15 @@ def _parse_equation_response( except (json.JSONDecodeError, AttributeError, ValueError) as e: logger.error(f"Error parsing equation analysis response: {e}") logger.debug(f"Raw response: {response}") + cleaned = self._strip_thinking_tags(response) fallback_entity = { "entity_name": entity_name if entity_name - else f"equation_{compute_mdhash_id(response)}", + else f"equation_{compute_mdhash_id(cleaned)}", "entity_type": "equation", - "summary": response[:100] + "..." if len(response) > 100 else response, + "summary": cleaned[:100] + "..." if len(cleaned) > 100 else cleaned, } - return response, fallback_entity + return cleaned, fallback_entity class GenericModalProcessor(BaseModalProcessor): @@ -1559,11 +1586,12 @@ def _parse_generic_response( except (json.JSONDecodeError, AttributeError, ValueError) as e: logger.error(f"Error parsing {content_type} analysis response: {e}") logger.debug(f"Raw response: {response}") + cleaned = self._strip_thinking_tags(response) fallback_entity = { "entity_name": entity_name if entity_name - else f"{content_type}_{compute_mdhash_id(response)}", + else f"{content_type}_{compute_mdhash_id(cleaned)}", "entity_type": content_type, - "summary": response[:100] + "..." if len(response) > 100 else response, + "summary": cleaned[:100] + "..." if len(cleaned) > 100 else cleaned, } - return response, fallback_entity + return cleaned, fallback_entity diff --git a/tests/test_strip_thinking_tags.py b/tests/test_strip_thinking_tags.py new file mode 100644 index 000000000..2a8d3dfd0 --- /dev/null +++ b/tests/test_strip_thinking_tags.py @@ -0,0 +1,125 @@ +"""Tests for _strip_thinking_tags in BaseModalProcessor (issue #159). + +Reasoning models like DeepSeek-R1 and Qwen2.5-think wrap their chain-of-thought +in blocks. When JSON parsing fails, the raw response was +previously stored verbatim — polluting the knowledge graph with internal model +reasoning instead of actual content descriptions. These tests verify that the +fallback path strips thinking tags before storing or returning the response. +""" + +from raganything.modalprocessors import BaseModalProcessor + + +class TestStripThinkingTags: + """Unit tests for BaseModalProcessor._strip_thinking_tags.""" + + def test_strips_think_tags(self): + raw = "some reasoning herefinal answer" + assert BaseModalProcessor._strip_thinking_tags(raw) == "final answer" + + def test_strips_thinking_tags(self): + raw = "internal thoughtreal content" + assert BaseModalProcessor._strip_thinking_tags(raw) == "real content" + + def test_strips_multiline_think_block(self): + raw = "\nline one\nline two\n\nactual description" + assert BaseModalProcessor._strip_thinking_tags(raw) == "actual description" + + def test_case_insensitive(self): + raw = "uppercase blockanswer" + assert BaseModalProcessor._strip_thinking_tags(raw) == "answer" + + def test_no_tags_unchanged(self): + raw = "plain response without any tags" + assert BaseModalProcessor._strip_thinking_tags(raw) == raw + + def test_empty_string(self): + assert BaseModalProcessor._strip_thinking_tags("") == "" + + def test_only_think_tags_returns_empty(self): + raw = "nothing useful" + assert BaseModalProcessor._strip_thinking_tags(raw) == "" + + def test_multiple_think_blocks(self): + raw = "firstmiddlesecondend" + assert BaseModalProcessor._strip_thinking_tags(raw) == "middleend" + + def test_nested_angle_brackets_in_content_preserved(self): + """Content after think block that contains angle brackets must survive.""" + raw = "reasoningresult with HTML" + assert BaseModalProcessor._strip_thinking_tags(raw) == "result with HTML" + + +class ConcreteProcessor(BaseModalProcessor): + """Minimal concrete subclass used to instantiate BaseModalProcessor.""" + + def __init__(self): + # Skip the full __init__ that requires a LightRAG instance + pass + + async def process_multimodal_content(self, *args, **kwargs): + pass + + +class TestParseResponseFallback: + """Tests that parse fallback paths return cleaned (think-stripped) content.""" + + THINK_RESPONSE = ( + "\nLet me analyze this step by step...\n\n" + "This is the actual description of the image." + ) + + def setup_method(self): + self.proc = ConcreteProcessor() + + # Patch _robust_json_parse to always raise ValueError so fallback fires + def always_fail(response): + raise ValueError("forced failure") + + self.proc._robust_json_parse = always_fail + + def test_image_fallback_strips_think(self): + from raganything.modalprocessors import ImageModalProcessor + + proc = ConcreteProcessor() + proc.__class__ = ImageModalProcessor + proc._robust_json_parse = lambda r: (_ for _ in ()).throw(ValueError("forced")) + + caption, entity = proc._parse_response(self.THINK_RESPONSE) + assert "" not in caption + assert "" not in entity["summary"] + assert "actual description" in caption + + def test_table_fallback_strips_think(self): + from raganything.modalprocessors import TableModalProcessor + + proc = ConcreteProcessor() + proc.__class__ = TableModalProcessor + proc._robust_json_parse = lambda r: (_ for _ in ()).throw(ValueError("forced")) + + caption, entity = proc._parse_table_response(self.THINK_RESPONSE) + assert "" not in caption + assert "" not in entity["summary"] + assert "actual description" in caption + + def test_equation_fallback_strips_think(self): + from raganything.modalprocessors import EquationModalProcessor + + proc = ConcreteProcessor() + proc.__class__ = EquationModalProcessor + proc._robust_json_parse = lambda r: (_ for _ in ()).throw(ValueError("forced")) + + caption, entity = proc._parse_equation_response(self.THINK_RESPONSE) + assert "" not in caption + assert "" not in entity["summary"] + + def test_generic_fallback_strips_think(self): + from raganything.modalprocessors import GenericModalProcessor + + proc = ConcreteProcessor() + proc.__class__ = GenericModalProcessor + proc._robust_json_parse = lambda r: (_ for _ in ()).throw(ValueError("forced")) + + caption, entity = proc._parse_generic_response(self.THINK_RESPONSE) + assert "" not in caption + assert "" not in entity["summary"] From 1daa8176d6599b098a59725321aee7f5f2980858 Mon Sep 17 00:00:00 2001 From: 0912078 <0912078@naver.com> Date: Fri, 3 Apr 2026 12:20:19 +0900 Subject: [PATCH 2/3] fix: demote misleading LibreOffice 'not found' warning to debug (closes #230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systems where only 'soffice' is on PATH (common on macOS), the existing fallback loop logged a WARNING for the 'libreoffice' candidate before successfully converting via 'soffice'. This caused users to see: WARNING: LibreOffice command 'libreoffice' not found INFO: Successfully converted file.pptx to PDF using soffice …and conclude that something was broken, even though the conversion succeeded. Fix: log FileNotFoundError at DEBUG level for any non-final candidate so that routine 'libreoffice' → 'soffice' fallback stays silent in normal logs. The WARNING is preserved only when the last candidate in the list is not found (meaning no usable LibreOffice binary exists at all and the conversion is about to fail). --- raganything/parser.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/raganything/parser.py b/raganything/parser.py index 90fa32993..0b2274695 100644 --- a/raganything/parser.py +++ b/raganything/parser.py @@ -146,7 +146,9 @@ def convert_office_to_pdf( commands_to_try = ["libreoffice", "soffice"] conversion_successful = False + last_cmd = commands_to_try[-1] for cmd in commands_to_try: + is_last = cmd == last_cmd try: convert_cmd = [ cmd, @@ -188,7 +190,17 @@ def convert_office_to_pdf( f"LibreOffice command '{cmd}' failed: {result.stderr}" ) except FileNotFoundError: - cls.logger.warning(f"LibreOffice command '{cmd}' not found") + # Only warn when all candidates are exhausted; otherwise + # log at debug level so that the normal fallback from + # 'libreoffice' → 'soffice' does not surface a spurious + # WARNING to users whose system only has 'soffice'. + if is_last: + cls.logger.warning(f"LibreOffice command '{cmd}' not found") + else: + cls.logger.debug( + f"LibreOffice command '{cmd}' not found, " + f"trying next candidate" + ) except subprocess.TimeoutExpired: cls.logger.warning(f"LibreOffice command '{cmd}' timed out") except Exception as e: From 514d8448bd8e914c9359803f739766957628d16e Mon Sep 17 00:00:00 2001 From: 0912078 <0912078@naver.com> Date: Fri, 3 Apr 2026 12:24:55 +0900 Subject: [PATCH 3/3] feat: add Ollama integration example (closes #118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ollama uses a different embedding API (/api/embed via the native ollama Python client) compared to the OpenAI-compatible /v1/embeddings endpoint assumed by the existing openai_embed helper. Pointing that helper at an Ollama host causes embedding failures for most models. Add examples/ollama_integration_example.py that: - Uses openai_complete_if_cache against Ollama's /v1 chat endpoint (works out of the box — Ollama exposes OpenAI-compatible chat) - Calls ollama.AsyncClient.embed() for embeddings so every model in the Ollama registry is supported without extra configuration - Follows the same structure as lmstudio_integration_example.py: connection check, embedding sanity-check, chat sanity-check, RAG init, sample insert + query - Supports OLLAMA_HOST / OLLAMA_LLM_MODEL / OLLAMA_EMBEDDING_MODEL / OLLAMA_EMBEDDING_DIM env vars with sensible defaults (llama3.2 + nomic-embed-text / 768-dim) --- examples/ollama_integration_example.py | 284 +++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 examples/ollama_integration_example.py diff --git a/examples/ollama_integration_example.py b/examples/ollama_integration_example.py new file mode 100644 index 000000000..a31177a3f --- /dev/null +++ b/examples/ollama_integration_example.py @@ -0,0 +1,284 @@ +""" +Ollama Integration Example with RAG-Anything + +This example demonstrates how to integrate Ollama with RAG-Anything for fully +local text document processing and querying. + +Ollama uses a different embedding API (/api/embed) compared to the OpenAI- +compatible /v1/embeddings endpoint, so you cannot simply point the standard +openai_embed helper at an Ollama host. This example wires up both the LLM +and the embedding function using the ``ollama`` Python library directly. + +Requirements: +- Ollama running locally: https://ollama.com/ +- ollama Python package: pip install ollama +- RAG-Anything installed: pip install raganything + +Quick start: + ollama pull llama3.2 # or any chat model you prefer + ollama pull nomic-embed-text # embedding model (768-dim) + python examples/ollama_integration_example.py + +Environment Setup (optional — defaults shown below): +Create a .env file with: +OLLAMA_HOST=http://localhost:11434 +OLLAMA_LLM_MODEL=llama3.2 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +OLLAMA_EMBEDDING_DIM=768 +""" + +import asyncio +import os +import uuid +from typing import Dict, List, Optional + +from dotenv import load_dotenv + +load_dotenv() + +# RAG-Anything imports +from raganything import RAGAnything, RAGAnythingConfig +from lightrag.utils import EmbeddingFunc +from lightrag.llm.openai import openai_complete_if_cache + +OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434") +OLLAMA_LLM_MODEL = os.getenv("OLLAMA_LLM_MODEL", "llama3.2") +OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text") +OLLAMA_EMBEDDING_DIM = int(os.getenv("OLLAMA_EMBEDDING_DIM", "768")) + +# Ollama exposes an OpenAI-compatible chat endpoint at /v1 — reuse the +# existing helper for the LLM side. +OLLAMA_BASE_URL = f"{OLLAMA_HOST}/v1" +OLLAMA_API_KEY = "ollama" # Ollama ignores the key but the client requires one + + +async def ollama_llm_model_func( + prompt: str, + system_prompt: Optional[str] = None, + history_messages: List[Dict] = None, + **kwargs, +) -> str: + """Top-level LLM function using Ollama's OpenAI-compatible endpoint.""" + return await openai_complete_if_cache( + model=OLLAMA_LLM_MODEL, + prompt=prompt, + system_prompt=system_prompt, + history_messages=history_messages or [], + base_url=OLLAMA_BASE_URL, + api_key=OLLAMA_API_KEY, + **kwargs, + ) + + +async def ollama_embedding_async(texts: List[str]) -> List[List[float]]: + """Top-level embedding function using the native Ollama embed API. + + Unlike the OpenAI-compatible /v1/embeddings endpoint (which Ollama does + not implement for all models), this calls /api/embed via the ``ollama`` + Python client so it works with any model pulled from the Ollama registry. + """ + import ollama + + client = ollama.AsyncClient(host=OLLAMA_HOST) + response = await client.embed(model=OLLAMA_EMBEDDING_MODEL, input=texts) + return response.embeddings + + +class OllamaRAGIntegration: + """Integration class for Ollama with RAG-Anything.""" + + def __init__(self): + self.host = OLLAMA_HOST + self.llm_model = OLLAMA_LLM_MODEL + self.embedding_model = OLLAMA_EMBEDDING_MODEL + self.embedding_dim = OLLAMA_EMBEDDING_DIM + + self.config = RAGAnythingConfig( + working_dir=f"./rag_storage_ollama/{uuid.uuid4()}", + parser="mineru", + parse_method="auto", + enable_image_processing=False, + enable_table_processing=True, + enable_equation_processing=True, + ) + print(f"📁 Using working_dir: {self.config.working_dir}") + + self.rag = None + + async def test_connection(self) -> bool: + """Verify that Ollama is reachable and the required models are available.""" + try: + import ollama + + print(f"🔌 Connecting to Ollama at: {self.host}") + client = ollama.AsyncClient(host=self.host) + models_response = await client.list() + available = [m.model for m in models_response.models] + + print(f"✅ Connected! {len(available)} model(s) available") + + for required in (self.llm_model, self.embedding_model): + # Ollama tags may include ':latest' suffix — check prefix match + found = any(m.startswith(required.split(":")[0]) for m in available) + marker = "✅" if found else "⚠️ " + print(f" {marker} {required}") + if not found: + print( + f" Run: ollama pull {required}" + ) + + return True + except ImportError: + print("❌ ollama package not installed — run: pip install ollama") + return False + except Exception as e: + print(f"❌ Connection failed: {e}") + print(" Is Ollama running? Try: ollama serve") + return False + + async def test_embedding(self) -> bool: + """Quick sanity-check for the embedding function.""" + try: + print(f"🔢 Testing embedding model: {self.embedding_model}") + vectors = await ollama_embedding_async(["hello world"]) + if vectors and len(vectors[0]) > 0: + print( + f"✅ Embedding OK — dim={len(vectors[0])} " + f"(configured: {self.embedding_dim})" + ) + if len(vectors[0]) != self.embedding_dim: + print( + f" ⚠️ Dimension mismatch! Set " + f"OLLAMA_EMBEDDING_DIM={len(vectors[0])} in your .env" + ) + return True + print("❌ Embedding returned empty vector") + return False + except Exception as e: + print(f"❌ Embedding test failed: {e}") + return False + + async def test_chat(self) -> bool: + """Quick sanity-check for the LLM function.""" + try: + print(f"💬 Testing LLM model: {self.llm_model}") + result = await ollama_llm_model_func("Say 'OK' in one word.") + print(f"✅ Chat OK — response: {result.strip()[:80]}") + return True + except Exception as e: + print(f"❌ Chat test failed: {e}") + return False + + def _make_embedding_func(self) -> EmbeddingFunc: + return EmbeddingFunc( + embedding_dim=self.embedding_dim, + max_token_size=8192, + func=ollama_embedding_async, + ) + + async def initialize_rag(self) -> bool: + """Initialize RAG-Anything with Ollama backends.""" + print("\nInitializing RAG-Anything with Ollama …") + try: + self.rag = RAGAnything( + config=self.config, + llm_model_func=ollama_llm_model_func, + embedding_func=self._make_embedding_func(), + ) + print("✅ RAG-Anything initialized") + return True + except Exception as e: + print(f"❌ Initialization failed: {e}") + return False + + async def process_document(self, file_path: str): + """Process a document with Ollama as the backend.""" + if not self.rag: + print("❌ Call initialize_rag() first") + return + print(f"📄 Processing: {file_path}") + await self.rag.process_document_complete( + file_path=file_path, + output_dir="./output_ollama", + parse_method="auto", + display_stats=True, + ) + print("✅ Processing complete") + + async def simple_query_example(self): + """Insert sample text and run a demonstration query.""" + if not self.rag: + print("❌ Call initialize_rag() first") + return + + content_list = [ + { + "type": "text", + "text": ( + "Ollama Integration with RAG-Anything\n\n" + "This integration lets you run a fully local RAG pipeline:\n" + "- Ollama serves the LLM via an OpenAI-compatible /v1 endpoint\n" + "- Ollama serves embeddings via its native /api/embed endpoint\n" + "- RAG-Anything handles document parsing and knowledge-graph construction\n\n" + "Popular embedding models: nomic-embed-text (768-dim), " + "mxbai-embed-large (1024-dim), all-minilm (384-dim)\n" + "Popular chat models: llama3.2, mistral, gemma3, phi4" + ), + "page_idx": 0, + } + ] + + print("\nInserting sample content …") + await self.rag.insert_content_list( + content_list=content_list, + file_path="ollama_integration_demo.txt", + doc_id=f"demo-{uuid.uuid4()}", + display_stats=True, + ) + print("✅ Content inserted") + + print("\n🔍 Running sample query …") + result = await self.rag.aquery( + "What embedding models are recommended for Ollama?", + mode="hybrid", + ) + print(f"Answer: {result[:400]}") + + +async def main(): + print("=" * 70) + print("Ollama + RAG-Anything Integration Example") + print("=" * 70) + + integration = OllamaRAGIntegration() + + if not await integration.test_connection(): + return False + + print() + if not await integration.test_embedding(): + return False + + print() + if not await integration.test_chat(): + return False + + print("\n" + "─" * 50) + if not await integration.initialize_rag(): + return False + + # Uncomment to process a real document: + # await integration.process_document("path/to/your/document.pdf") + + await integration.simple_query_example() + + print("\n" + "=" * 70) + print("Example completed successfully!") + print("=" * 70) + return True + + +if __name__ == "__main__": + print("🚀 Starting Ollama integration example …") + success = asyncio.run(main()) + exit(0 if success else 1)