Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions packages/leann-core/src/leann/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
resolve_anthropic_base_url,
resolve_minimax_api_key,
resolve_minimax_base_url,
resolve_novita_api_key,
resolve_novita_base_url,
resolve_ollama_host,
resolve_openai_api_key,
resolve_openai_base_url,
Expand Down Expand Up @@ -1010,6 +1012,76 @@ def ask(self, prompt: str, **kwargs) -> str:
return f"Error: Could not get a response from MiniMax. Details: {e}"


class NovitaChat(LLMInterface):
"""LLM interface for Novita AI models via the OpenAI-compatible API.

Supported models include:
- moonshotai/kimi-k2.5 (default): 262K context, MoE with function calling,
structured output, reasoning, and vision support.
- zai-org/glm-5: 202K context, MoE with function calling, structured output,
reasoning support.
- minimax/minimax-m2.5: 204K context, MoE with function calling, structured
output, reasoning support.

See CLAUDE.md for the full model catalog with pricing and features.
"""

def __init__(
self,
model: str = "moonshotai/kimi-k2.5",
api_key: Optional[str] = None,
base_url: Optional[str] = None,
):
self.model = model
self.base_url = resolve_novita_base_url(base_url)
self.api_key = resolve_novita_api_key(api_key)

if not self.api_key:
raise ValueError(
"Novita API key is required. Set NOVITA_API_KEY environment variable or pass api_key parameter."
)

logger.info(
"Initializing Novita Chat with model='%s' and base_url='%s'",
model,
self.base_url,
)

try:
import openai

self.client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
except ImportError:
raise ImportError(
"The 'openai' library is required for Novita models. Please install it with 'pip install openai'."
)

def ask(self, prompt: str, **kwargs) -> str:
params = {
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
"temperature": kwargs.get("temperature", 0.7),
"max_tokens": kwargs.get("max_tokens", 1000),
}

if "top_p" in kwargs:
params["top_p"] = kwargs["top_p"]

logger.info(f"Sending request to Novita with model {self.model}")

try:
response = cast(Any, self.client.chat.completions).create(**params)
logger.info(
f"Total tokens = {response.usage.total_tokens}, prompt tokens = {response.usage.prompt_tokens}, completion tokens = {response.usage.completion_tokens}"
)
if response.choices[0].finish_reason == "length":
logger.warning("The query is exceeding the maximum allowed number of tokens")
return response.choices[0].message.content.strip()
except Exception as e:
logger.error(f"Error communicating with Novita: {e}")
return f"Error: Could not get a response from Novita. Details: {e}"


class SimulatedChat(LLMInterface):
"""A simple simulated chat for testing and development."""

Expand Down Expand Up @@ -1074,6 +1146,12 @@ def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface:
api_key=llm_config.get("api_key"),
base_url=llm_config.get("base_url"),
)
elif llm_type == "novita":
return NovitaChat(
model=model or "moonshotai/kimi-k2.5",
api_key=llm_config.get("api_key"),
base_url=llm_config.get("base_url"),
)
elif llm_type == "simulated":
return SimulatedChat()
else:
Expand Down
4 changes: 2 additions & 2 deletions packages/leann-core/src/leann/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def create_parser(self) -> argparse.ArgumentParser:
"--llm",
type=str,
default="ollama",
choices=["simulated", "ollama", "hf", "openai", "anthropic", "minimax"],
choices=["simulated", "ollama", "hf", "openai", "anthropic", "minimax", "novita"],
help="LLM provider (default: ollama)",
)
ask_parser.add_argument(
Expand Down Expand Up @@ -562,7 +562,7 @@ def create_parser(self) -> argparse.ArgumentParser:
"--llm",
type=str,
default="ollama",
choices=["simulated", "ollama", "hf", "openai", "anthropic", "minimax"],
choices=["simulated", "ollama", "hf", "openai", "anthropic", "minimax", "novita"],
help="LLM provider (default: ollama)",
)
react_parser.add_argument(
Expand Down
40 changes: 40 additions & 0 deletions packages/leann-core/src/leann/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
from __future__ import annotations

import json
import logging
import os
from typing import Any

logger = logging.getLogger(__name__)

# Default fallbacks to preserve current behaviour while keeping them in one place.
_DEFAULT_OLLAMA_HOST = "http://localhost:11434"
_DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
_DEFAULT_ANTHROPIC_BASE_URL = "https://api.anthropic.com"
_DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1"
_DEFAULT_NOVITA_BASE_URL = "https://api.novita.ai/openai"


def _clean_url(value: str) -> str:
Expand Down Expand Up @@ -114,6 +118,42 @@ def resolve_minimax_api_key(explicit: str | None = None) -> str | None:
return os.getenv("MINIMAX_API_KEY")


def resolve_novita_base_url(explicit: str | None = None) -> str:
"""Resolve the base URL for Novita AI services."""

candidates = (
explicit,
os.getenv("LEANN_NOVITA_BASE_URL"),
os.getenv("NOVITA_BASE_URL"),
os.getenv("OPENAI_BASE_URL"), # Fallback to OpenAI base URL
)

for candidate in candidates:
if candidate:
return _clean_url(candidate)

return _clean_url(_DEFAULT_NOVITA_BASE_URL)


def resolve_novita_api_key(explicit: str | None = None) -> str | None:
"""Resolve the API key for Novita AI services."""

if explicit:
return explicit

novita_key = os.getenv("NOVITA_API_KEY")
if novita_key:
return novita_key

openai_key = os.getenv("OPENAI_API_KEY")
if openai_key:
logger.warning(
"NOVITA_API_KEY not set, falling back to OPENAI_API_KEY. "
"This may cause authentication issues if the OpenAI key is not valid for Novita AI."
)
return openai_key


def encode_provider_options(options: dict[str, Any] | None) -> str | None:
"""Serialize provider options for child processes."""

Expand Down
Loading
Loading