Bessouat40
diff --git a/‎src/raglight/embeddings/gemini_embeddings.py‎
Lines changed: 8 additions & 39 deletions b/‎src/raglight/embeddings/gemini_embeddings.py‎
Lines changed: 8 additions & 39 deletions
diff --git a/‎src/raglight/embeddings/ollama_embeddings.py‎
Lines changed: 9 additions & 26 deletions b/‎src/raglight/embeddings/ollama_embeddings.py‎
Lines changed: 9 additions & 26 deletions
diff --git a/‎src/raglight/embeddings/openai_embeddings.py‎
Lines changed: 6 additions & 30 deletions b/‎src/raglight/embeddings/openai_embeddings.py‎
Lines changed: 6 additions & 30 deletions
diff --git a/‎src/raglight/llm/bedrock_model.py‎
Lines changed: 3 additions & 2 deletions b/‎src/raglight/llm/bedrock_model.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/raglight/llm/gemini_model.py‎
Lines changed: 37 additions & 106 deletions b/‎src/raglight/llm/gemini_model.py‎
Lines changed: 37 additions & 106 deletions
diff --git a/‎src/raglight/llm/llm.py‎
Lines changed: 4 additions & 1 deletion b/‎src/raglight/llm/llm.py‎
Lines changed: 4 additions & 1 deletion
@@ -1,59 +1,28 @@
 from __future__ import annotations
-from typing import Optional, List, Any
+from typing import Optional, List
 from typing_extensions import override
 
-from google.genai import Client
-from google.genai import types
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
 
 from ..config.settings import Settings
 from .embeddings_model import EmbeddingsModel
 
 
 class GeminiEmbeddingsModel(EmbeddingsModel):
-    """
-    Concrete implementation of the EmbeddingsModel for Gemini models using the official Google GenAI library.
-    """
-
     def __init__(self, model_name: str, api_base: Optional[str] = None) -> None:
-        """
-        Initializes a GeminiEmbeddingsModel instance.
-
-        Args:
-            model_name (str): The name of the Gemini model to load (e.g., "models/embedding-001").
-            api_base (Optional[str]): Not strictly used by the official lib as it relies on global config,
-                                      but kept for interface consistency.
-        """
         super().__init__(model_name, api_base)
 
     @override
-    def load(self) -> Any:
-        """
-        Configures the Google GenAI library.
-        Returns the module reference as the 'client'.
-        """
-        return Client(
-            api_key=Settings.GEMINI_API_KEY,
-            http_options=types.HttpOptions(base_url=self.api_base),
+    def load(self) -> GoogleGenerativeAIEmbeddings:
+        return GoogleGenerativeAIEmbeddings(
+            model=self.model_name,
+            google_api_key=Settings.GEMINI_API_KEY,
         )
 
     @override
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        """
-        Embed list of documents using Google GenAI.
-        Specifies 'retrieval_document' task type for optimized document storage embeddings.
-        """
-        result = self.model.embed_content(
-            model=self.model_name, content=texts, task_type="retrieval_document"
-        )
-        return result["embedding"]
+        return self.model.embed_documents(texts)
 
     @override
     def embed_query(self, text: str) -> List[float]:
-        """
-        Embed a single query text.
-        Specifies 'retrieval_query' task type for optimized search query embeddings.
-        """
-        result = self.model.embed_content(
-            model=self.model_name, content=text, task_type="retrieval_query"
-        )
-        return result["embedding"]
+        return self.model.embed_query(text)
@@ -2,16 +2,13 @@
 from typing import Optional, List, Dict, Any
 from typing_extensions import override
 
+from langchain_ollama import OllamaEmbeddings
+
 from ..config.settings import Settings
 from .embeddings_model import EmbeddingsModel
-from ollama import Client
 
 
 class OllamaEmbeddingsModel(EmbeddingsModel):
-    """
-    Concrete implementation of the EmbeddingsModel for Ollama models using the official python library.
-    """
-
     def __init__(
         self,
         model_name: str,
@@ -20,37 +17,23 @@ def __init__(
     ) -> None:
         resolved_api_base = api_base or Settings.DEFAULT_OLLAMA_CLIENT
         super().__init__(model_name, api_base=resolved_api_base)
-
         self.options = options or {}
-
-        # Keep critical config to prevent internal Ollama "panic" on large docs
         if "num_batch" not in self.options:
             self.options["num_batch"] = 8192
         if "num_ctx" not in self.options:
             self.options["num_ctx"] = 8192
 
     @override
-    def load(self) -> Client:
-        return Client(host=self.api_base)
+    def load(self) -> OllamaEmbeddings:
+        return OllamaEmbeddings(
+            model=self.model_name,
+            base_url=self.api_base,
+        )
 
     @override
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        """
-        Embed list of documents using the optimized batch 'embed' method.
-        """
-        # OPTIMIZATION: Use 'embed' (not 'embeddings') to process the whole list at once.
-        # This sends a single request and leverages GPU batch processing.
-        response = self.model.embed(
-            model=self.model_name, input=texts, options=self.options
-        )
-        return response["embeddings"]
+        return self.model.embed_documents(texts)
 
     @override
     def embed_query(self, text: str) -> List[float]:
-        """
-        Embed a single query text.
-        """
-        response = self.model.embeddings(
-            model=self.model_name, prompt=text, options=self.options
-        )
-        return response["embedding"]
+        return self.model.embed_query(text)
@@ -2,53 +2,29 @@
 from typing import Optional, List
 from typing_extensions import override
 
-from openai import OpenAI
+from langchain_openai import OpenAIEmbeddings
 
 from ..config.settings import Settings
 from .embeddings_model import EmbeddingsModel
 
 
 class OpenAIEmbeddingsModel(EmbeddingsModel):
-    """
-    Concrete implementation of the EmbeddingsModel for OpenAI models using the official python library.
-    """
-
     def __init__(self, model_name: str, api_base: Optional[str] = None) -> None:
-        """
-        Initializes an OpenAIEmbeddingsModel instance.
-
-        Args:
-            model_name (str): The name of the OpenAI model to load.
-            api_base (Optional[str]): The base URL for the API (optional).
-        """
         resolved_api_base = api_base or Settings.DEFAULT_OPENAI_CLIENT
         super().__init__(model_name, api_base=resolved_api_base)
 
     @override
-    def load(self) -> OpenAI:
-        """
-        Loads the OpenAI client.
-
-        Returns:
-            OpenAI: The initialized OpenAI client.
-        """
-        return OpenAI(
+    def load(self) -> OpenAIEmbeddings:
+        return OpenAIEmbeddings(
+            model=self.model_name,
             api_key=Settings.OPENAI_API_KEY,
             base_url=self.api_base,
         )
 
     @override
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        """
-        Embed list of documents using the official OpenAI client.
-        """
-        response = self.model.embeddings.create(input=texts, model=self.model_name)
-        return [data.embedding for data in response.data]
+        return self.model.embed_documents(texts)
 
     @override
     def embed_query(self, text: str) -> List[float]:
-        """
-        Embed a single query text.
-        """
-        response = self.model.embeddings.create(input=[text], model=self.model_name)
-        return response.data[0].embedding
+        return self.model.embed_query(text)
@@ -93,7 +93,7 @@ def generate(self, input: Dict[str, Any]) -> str:
         return response.content
 
     @override
-    def generate_streaming(self, input: Dict[str, Any]) -> Iterable[str]:
+    def generate_streaming(self, input: Dict[str, Any], callbacks=None) -> Iterable[str]:
         history = input.get("history", [])
         messages = []
 
@@ -108,6 +108,7 @@ def generate_streaming(self, input: Dict[str, Any]) -> Iterable[str]:
 
         messages.append(HumanMessage(content=input.get("question", "")))
 
-        for chunk in self.model.stream(messages):
+        stream_config = {"callbacks": callbacks} if callbacks else {}
+        for chunk in self.model.stream(messages, config=stream_config):
             if chunk.content:
                 yield chunk.content
@@ -5,24 +5,12 @@
 
 from ..config.settings import Settings
 from .llm import LLM
-from google.genai import Client
-from google.genai import types
 
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 
-class GeminiModel(LLM):
-    """
-    A subclass of LLM that uses Google's Gemini as the backend for text generation.
-
-    This class provides an interface to interact with Google's Generative AI,
-    enabling text generation with various models supported by the Gemini API.
-
-    Attributes:
-        model_name (str): The name of the model to use with Gemini.
-        role (str): The role of the user in the conversation, typically "user".
-        system_prompt (str): The system prompt to use for text generation.
-        model (Client): The Gemini client configured to interact with the API.
-    """
 
+class GeminiModel(LLM):
     def __init__(
         self,
         model_name: str,
@@ -31,106 +19,49 @@ def __init__(
         api_base: Optional[str] = None,
         role: str = "user",
     ) -> None:
-        """
-        Initializes an instance of GeminiModel.
-
-        Args:
-            model_name (str): The name of the model to use with Gemini.
-            system_prompt (Optional[str]): The system prompt to use. If not provided, it will be loaded from system_prompt_file or use the default value.
-            system_prompt_file (Optional[str]): The path to the file to load the system prompt from. If provided, it takes precedence over system_prompt.
-            role (str): The role of the user in the conversation, defaults to "user".
-        """
         self.api_base = api_base or Settings.DEFAULT_GOOGLE_CLIENT
         super().__init__(model_name, system_prompt, system_prompt_file, self.api_base)
         logging.info(f"Using Gemini with {model_name} model 🤖")
         self.role: str = role
 
     @override
-    def load(self) -> Client:
-        """
-        Loads the Gemini client using the modern google.generativeai SDK.
-
-        Returns:
-            Client: The client object to interact with Gemini API.
-        """
-        return Client(
-            api_key=Settings.GEMINI_API_KEY,
-            http_options=types.HttpOptions(base_url=self.api_base),
-        )
-
-    @override
-    def generate(self, input: Dict[str, Any]) -> str:
-        """
-        Generates text using the Gemini model.
-        It constructs a structured 'contents' payload using the 'types' module
-        as requested for proper input management.
-
-        Args:
-            input (Dict[str, Any]): The input data for text generation.
-
-        Returns:
-            str: The text generated by the model.
-        """
-        history = input.get("history", [])
-        contents = []
-
-        for msg in history:
-            role = "model" if msg["role"] == "assistant" else "user"
-            contents.append(
-                types.Content(role=role, parts=[types.Part(text=msg["content"])])
-            )
-
-        contents.append(
-            types.Content(
-                role="user", parts=[types.Part(text=input.get("question", ""))]
-            )
+    def load(self) -> ChatGoogleGenerativeAI:
+        return ChatGoogleGenerativeAI(
+            model=self.model_name,
+            google_api_key=Settings.GEMINI_API_KEY,
         )
 
-        config = None
+    def _build_messages(self, input: Dict[str, Any]):
+        messages = []
         if self.system_prompt:
-            config = types.GenerateContentConfig(system_instruction=self.system_prompt)
-
-        try:
-            response = self.model.models.generate_content(
-                model=self.model_name,
-                contents=contents,
-                config=config,
-            )
-            if not response.candidates:
-                logging.warning("Response was blocked. Checking prompt feedback.")
-                if response.prompt_feedback:
-                    logging.warning(f"Prompt Feedback: {response.prompt_feedback}")
-                return "Response blocked due to safety settings."
-            return response.text
-        except Exception as e:
-            logging.error(f"An error occurred during Gemini content generation: {e}")
-            return f"Error: {e}"
+            messages.append(SystemMessage(content=self.system_prompt))
+        for msg in input.get("history", []):
+            if msg["role"] == "assistant":
+                messages.append(AIMessage(content=msg["content"]))
+            else:
+                messages.append(HumanMessage(content=msg["content"]))
+
+        question = input.get("question", "")
+        if "images" in input:
+            content = [{"type": "text", "text": question}]
+            for image in input["images"]:
+                try:
+                    content.append({"type": "image_url", "image_url": f"data:image/jpeg;base64,{image['base64']}"})
+                except Exception as e:
+                    logging.error(f"Could not read image: {e}")
+            messages.append(HumanMessage(content=content))
+        else:
+            messages.append(HumanMessage(content=question))
+        return messages
 
     @override
-    def generate_streaming(self, input: Dict[str, Any]) -> Iterable[str]:
-        history = input.get("history", [])
-        contents = []
-
-        for msg in history:
-            role = "model" if msg["role"] == "assistant" else "user"
-            contents.append(
-                types.Content(role=role, parts=[types.Part(text=msg["content"])])
-            )
-
-        contents.append(
-            types.Content(
-                role="user", parts=[types.Part(text=input.get("question", ""))]
-            )
-        )
-
-        config = None
-        if self.system_prompt:
-            config = types.GenerateContentConfig(system_instruction=self.system_prompt)
+    def generate(self, input: Dict[str, Any]) -> str:
+        response = self.model.invoke(self._build_messages(input))
+        return response.content
 
-        for chunk in self.model.models.generate_content_stream(
-            model=self.model_name,
-            contents=contents,
-            config=config,
-        ):
-            if chunk.text:
-                yield chunk.text
+    @override
+    def generate_streaming(self, input: Dict[str, Any], callbacks=None) -> Iterable[str]:
+        config = {"callbacks": callbacks} if callbacks else {}
+        for chunk in self.model.stream(self._build_messages(input), config=config):
+            if chunk.content:
+                yield chunk.content
@@ -76,12 +76,15 @@ def generate(self, input: Dict[str, Any]) -> str:
         pass
 
     @abstractmethod
-    def generate_streaming(self, input: Dict[str, Any]) -> Iterable[str]:
+    def generate_streaming(
+        self, input: Dict[str, Any], callbacks: Optional[list] = None
+    ) -> Iterable[str]:
         """
         Abstract method to generate text in streaming mode.
 
         Args:
             input (Dict[str, Any]): A dictionary containing the input data for text generation.
+            callbacks (Optional[list]): Optional list of LangChain callbacks (e.g. Langfuse).
 
         Yields:
             str: Successive chunks of the generated output.