stacklok · aponcedeleonch · Dec 9, 2024 · Dec 9, 2024
diff --git a/docs/cli.md b/docs/cli.md
@@ -66,6 +66,12 @@ codegate serve [OPTIONS]
   - Base URL for Ollama provider (/api path is added automatically)
   - Overrides configuration file and environment variables
 
+- `--model-base-path TEXT`: Base path for loading models needed for the system
+  - Optional
+
+- `--embedding-model TEXT`: Name of the model used for embeddings
+  - Optional
+
 ### show-prompts
 
 Display the loaded system prompts:

diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
@@ -15,4 +15,4 @@ exec nginx -g 'daemon off;' &
 
 # Step 3: Start the main application (serve)
 echo "Starting the application..."
-exec python -m src.codegate.cli serve --port 8989 --host 0.0.0.0 --vllm-url https://inference.codegate.ai
+exec python -m src.codegate.cli serve --port 8989 --host 0.0.0.0 --vllm-url https://inference.codegate.ai --model-base-path /app/models
diff --git a/src/codegate/cli.py b/src/codegate/cli.py
@@ -115,6 +115,18 @@ def show_prompts(prompts: Optional[Path]) -> None:
     default=None,
     help="Ollama provider URL (default: http://localhost:11434/api)",
 )
+@click.option(
+    "--model-base-path",
+    type=str,
+    default="./models",
+    help="Path to the model base directory",
+)
+@click.option(
+    "--embedding-model",
+    type=str,
+    default="all-minilm-L6-v2-q5_k_m.gguf",
+    help="Name of the model to use for embeddings",
+)
 def serve(
     port: Optional[int],
     host: Optional[str],
@@ -126,6 +138,8 @@ def serve(
     openai_url: Optional[str],
     anthropic_url: Optional[str],
     ollama_url: Optional[str],
+    model_base_path: Optional[str],
+    embedding_model: Optional[str],
 ) -> None:
     """Start the codegate server."""
     logger = None
@@ -150,6 +164,8 @@ def serve(
             cli_log_level=log_level,
             cli_log_format=log_format,
             cli_provider_urls=cli_provider_urls,
+            model_base_path=model_base_path,
+            embedding_model=embedding_model,
         )
 
         setup_logging(cfg.log_level, cfg.log_format)
@@ -163,6 +179,8 @@ def serve(
                 "log_format": cfg.log_format.value,
                 "prompts_loaded": len(cfg.prompts.prompts),
                 "provider_urls": cfg.provider_urls,
+                "model_base_path": cfg.model_base_path,
+                "embedding_model": cfg.embedding_model,
             },
         )
 

diff --git a/src/codegate/config.py b/src/codegate/config.py
@@ -176,6 +176,8 @@ def load(
         cli_log_level: Optional[str] = None,
         cli_log_format: Optional[str] = None,
         cli_provider_urls: Optional[Dict[str, str]] = None,
+        model_base_path: Optional[str] = None,
+        embedding_model: Optional[str] = None,
     ) -> "Config":
         """Load configuration with priority resolution.
 
@@ -193,6 +195,8 @@ def load(
             cli_log_level: Optional CLI log level override
             cli_log_format: Optional CLI log format override
             cli_provider_urls: Optional dict of provider URLs from CLI
+            model_base_path: Optional path to model base directory
+            embedding_model: Optional name of the model to use for embeddings
 
         Returns:
             Config: Resolved configuration
@@ -223,6 +227,10 @@ def load(
             config.log_format = env_config.log_format
         if "CODEGATE_PROMPTS_FILE" in os.environ:
             config.prompts = env_config.prompts
+        if "CODEGATE_MODEL_BASE_PATH" in os.environ:
+            config.model_base_path = env_config.model_base_path
+        if "CODEGATE_EMBEDDING_MODEL" in os.environ:
+            config.embedding_model = env_config.embedding_model
 
         # Override provider URLs from environment
         for provider, url in env_config.provider_urls.items():
@@ -241,6 +249,10 @@ def load(
             config.prompts = PromptConfig.from_file(prompts_path)
         if cli_provider_urls is not None:
             config.provider_urls.update(cli_provider_urls)
+        if model_base_path is not None:
+            config.model_base_path = model_base_path
+        if embedding_model is not None:
+            config.embedding_model = embedding_model
 
         # Set the __config class attribute
         Config.__config = config

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -67,17 +67,23 @@ def test_serve_default_options(
         assert result.exit_code == 0
         mock_setup_logging.assert_called_once_with(LogLevel.INFO, LogFormat.JSON)
         mock_logging.assert_called_with("codegate")
-        logger_instance.info.assert_any_call(
-            "Starting server",
-            extra={
-                "host": "localhost",
-                "port": 8989,
-                "log_level": "INFO",
-                "log_format": "JSON",
-                "prompts_loaded": 6,  # Default prompts are loaded
-                "provider_urls": DEFAULT_PROVIDER_URLS,
-            },
-        )
+
+        # validate only a subset of the expected extra arguments, as image provides more
+        expected_extra = {
+            "host": "localhost",
+            "port": 8989,
+            "log_level": "INFO",
+            "log_format": "JSON",
+            "prompts_loaded": 6,
+            "provider_urls": DEFAULT_PROVIDER_URLS,
+        }
+
+        # Retrieve the actual call arguments
+        calls = [call[1]['extra'] for call in logger_instance.info.call_args_list]
+
+        # Check if one of the calls matches the expected subset
+        assert any(all(expected_extra[k] == actual_extra.get(k)
+                       for k in expected_extra) for actual_extra in calls)
         mock_run.assert_called_once()
 
 
@@ -106,17 +112,22 @@ def test_serve_custom_options(
         assert result.exit_code == 0
         mock_setup_logging.assert_called_once_with(LogLevel.DEBUG, LogFormat.TEXT)
         mock_logging.assert_called_with("codegate")
-        logger_instance.info.assert_any_call(
-            "Starting server",
-            extra={
-                "host": "localhost",
-                "port": 8989,
-                "log_level": "DEBUG",
-                "log_format": "TEXT",
-                "prompts_loaded": 6,  # Default prompts are loaded
-                "provider_urls": DEFAULT_PROVIDER_URLS,
-            },
-        )
+
+        # Retrieve the actual call arguments
+        calls = [call[1]['extra'] for call in logger_instance.info.call_args_list]
+
+        expected_extra = {
+            "host": "localhost",
+            "port": 8989,
+            "log_level": "DEBUG",
+            "log_format": "TEXT",
+            "prompts_loaded": 6,  # Default prompts are loaded
+            "provider_urls": DEFAULT_PROVIDER_URLS,
+        }
+
+        # Check if one of the calls matches the expected subset
+        assert any(all(expected_extra[k] == actual_extra.get(k)
+                       for k in expected_extra) for actual_extra in calls)
         mock_run.assert_called_once()
 
 
@@ -146,17 +157,22 @@ def test_serve_with_config_file(
         assert result.exit_code == 0
         mock_setup_logging.assert_called_once_with(LogLevel.DEBUG, LogFormat.JSON)
         mock_logging.assert_called_with("codegate")
-        logger_instance.info.assert_any_call(
-            "Starting server",
-            extra={
-                "host": "localhost",
-                "port": 8989,
-                "log_level": "DEBUG",
-                "log_format": "JSON",
-                "prompts_loaded": 6,  # Default prompts are loaded
-                "provider_urls": DEFAULT_PROVIDER_URLS,
-            },
-        )
+
+        # Retrieve the actual call arguments
+        calls = [call[1]['extra'] for call in logger_instance.info.call_args_list]
+
+        expected_extra = {
+            "host": "localhost",
+            "port": 8989,
+            "log_level": "DEBUG",
+            "log_format": "JSON",
+            "prompts_loaded": 6,  # Default prompts are loaded
+            "provider_urls": DEFAULT_PROVIDER_URLS,
+        }
+
+        # Check if one of the calls matches the expected subset
+        assert any(all(expected_extra[k] == actual_extra.get(k)
+                       for k in expected_extra) for actual_extra in calls)
         mock_run.assert_called_once()
 
 
@@ -198,17 +214,22 @@ def test_serve_priority_resolution(
         assert result.exit_code == 0
         mock_setup_logging.assert_called_once_with(LogLevel.ERROR, LogFormat.TEXT)
         mock_logging.assert_called_with("codegate")
-        logger_instance.info.assert_any_call(
-            "Starting server",
-            extra={
-                "host": "example.com",
-                "port": 8080,
-                "log_level": "ERROR",
-                "log_format": "TEXT",
-                "prompts_loaded": 6,  # Default prompts are loaded
-                "provider_urls": DEFAULT_PROVIDER_URLS,
-            },
-        )
+
+        # Retrieve the actual call arguments
+        calls = [call[1]['extra'] for call in logger_instance.info.call_args_list]
+
+        expected_extra = {
+            "host": "example.com",
+            "port": 8080,
+            "log_level": "ERROR",
+            "log_format": "TEXT",
+            "prompts_loaded": 6,  # Default prompts are loaded
+            "provider_urls": DEFAULT_PROVIDER_URLS,
+        }
+
+        # Check if one of the calls matches the expected subset
+        assert any(all(expected_extra[k] == actual_extra.get(k)
+                       for k in expected_extra) for actual_extra in calls)
         mock_run.assert_called_once()