Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,23 @@ class ChatProviderTemplate(TypedDict):
"timeout": "20",
"proxy": "",
},
"MiMo TTS(API)": {
"id": "mimo_tts",
"type": "mimo_tts_api",
"provider": "mimo",
"provider_type": "text_to_speech",
"enable": False,
"api_key": "",
"api_base": "https://api.xiaomimimo.com/v1",
"model": "mimo-v2-tts",
"mimo-tts-voice": "mimo_default",
"mimo-tts-format": "wav",
"mimo-tts-style-prompt": "",
"mimo-tts-dialect": "",
"mimo-tts-seed-text": "Hello, MiMo, have you had lunch?",
"timeout": "20",
"proxy": "",
},
"Genie TTS": {
"id": "genie_tts",
"provider": "genie_tts",
Expand Down Expand Up @@ -2314,6 +2331,31 @@ class ChatProviderTemplate(TypedDict):
"type": "string",
"hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'",
},
"mimo-tts-voice": {
"description": "音色",
"type": "string",
"hint": "MiMo TTS 的音色名称。默认值为 'mimo_default'。",
},
"mimo-tts-format": {
"description": "输出格式",
"type": "string",
"hint": "MiMo TTS 生成音频的格式,例如 'wav'。",
},
"mimo-tts-style-prompt": {
"description": "风格提示词",
"type": "string",
"hint": "用于控制生成语音的说话风格、语气或情绪,例如温柔、活泼、沉稳等。可留空。",
},
"mimo-tts-dialect": {
"description": "方言",
"type": "string",
"hint": "指定生成语音时使用的方言或口音,例如四川话、粤语口音等。可留空。",
},
"mimo-tts-seed-text": {
"description": "种子文本",
"type": "string",
"hint": "用于引导音色和说话方式的参考文本,会影响生成语音的表达风格。",
},
"fishaudio-tts-character": {
"description": "character",
"type": "string",
Expand Down
4 changes: 4 additions & 0 deletions astrbot/core/provider/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,10 @@ def dynamic_import_provider(self, type: str) -> None:
from .sources.openai_tts_api_source import (
ProviderOpenAITTSAPI as ProviderOpenAITTSAPI,
)
case "mimo_tts_api":
from .sources.mimo_tts_api_source import (
ProviderMiMoTTSAPI as ProviderMiMoTTSAPI,
)
case "genie_tts":
from .sources.genie_tts import (
GenieTTSProvider as GenieTTSProvider,
Expand Down
149 changes: 149 additions & 0 deletions astrbot/core/provider/sources/mimo_tts_api_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import base64
import uuid
from pathlib import Path

import httpx

from ..entities import ProviderType
from ..provider import TTSProvider
from ..register import register_provider_adapter
from astrbot import logger
from astrbot.core.utils.astrbot_path import get_astrbot_temp_path


def normalize_timeout(timeout: int | str | None) -> int | None:
if timeout in (None, ""):
return None
if isinstance(timeout, str):
return int(timeout)
return timeout


def build_headers(api_key: str) -> dict[str, str]:
headers = {"Content-Type": "application/json"}
if api_key:
headers["api-key"] = api_key
headers["Authorization"] = f"Bearer {api_key}"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

build_headers 函数中,同时设置 api-keyAuthorization 头部可能存在冗余。通常情况下,API 只会使用其中一个进行认证。建议查阅 MiMo TTS API 的官方文档,确认只需要其中一个头部,以避免潜在的冲突或不必要的请求开销。

Suggested change
if api_key:
headers["api-key"] = api_key
headers["Authorization"] = f"Bearer {api_key}"
if api_key:
# 根据 MiMo API 文档,可能只需要其中一个头部
# 例如,如果只需要 Authorization 头部:
headers["Authorization"] = f"Bearer {api_key}"
# 如果只需要 api-key 头部:
# headers["api-key"] = api_key

return headers


def get_temp_dir() -> Path:
temp_dir = Path(get_astrbot_temp_path())
temp_dir.mkdir(parents=True, exist_ok=True)
return temp_dir


def create_http_client(timeout: int | None, proxy: str) -> httpx.AsyncClient:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

create_http_client 函数中,client_kwargs 的类型提示 dict[str, object] 过于宽泛。考虑到 timeoutfollow_redirects 的具体类型,可以将其细化为 dict[str, int | bool | str],以提高类型安全性。

Suggested change
def create_http_client(timeout: int | None, proxy: str) -> httpx.AsyncClient:
def create_http_client(timeout: int | None, proxy: str) -> httpx.AsyncClient:
client_kwargs: dict[str, int | bool | str] = {

client_kwargs: dict[str, object] = {
"timeout": timeout,
"follow_redirects": True,
}
if proxy:
logger.info("[MiMo API] Using proxy: %s", proxy)
client_kwargs["proxy"] = proxy
return httpx.AsyncClient(**client_kwargs)


def build_api_url(api_base: str) -> str:
normalized_api_base = api_base.rstrip("/")
if normalized_api_base.endswith("/chat/completions"):
return normalized_api_base
return normalized_api_base + "/chat/completions"


@register_provider_adapter(
"mimo_tts_api",
"MiMo TTS API",
provider_type=ProviderType.TEXT_TO_SPEECH,
)
class ProviderMiMoTTSAPI(TTSProvider):
def __init__(
self,
provider_config: dict,
provider_settings: dict,
) -> None:
super().__init__(provider_config, provider_settings)
self.chosen_api_key = provider_config.get("api_key", "")
self.api_base = provider_config.get(
"api_base",
"https://api.xiaomimimo.com/v1",
)
self.proxy = provider_config.get("proxy", "")
self.timeout = normalize_timeout(provider_config.get("timeout", 20))
self.voice = provider_config.get("mimo-tts-voice", "mimo_default")
self.audio_format = provider_config.get("mimo-tts-format", "wav")
self.style_prompt = provider_config.get("mimo-tts-style-prompt", "")
self.dialect = provider_config.get("mimo-tts-dialect", "")
self.seed_text = provider_config.get(
"mimo-tts-seed-text",
"Hello, MiMo, have you had lunch?",
)
self.set_model(provider_config.get("model", "mimo-v2-tts"))
self.client = create_http_client(self.timeout, self.proxy)

def _build_user_prompt(self) -> str:
prompt_parts: list[str] = []

if self.style_prompt.strip():
prompt_parts.append(self.style_prompt.strip())
if self.dialect.strip():
prompt_parts.append(f"Please use {self.dialect.strip()} when speaking.")

if not prompt_parts:
return self.seed_text

if self.seed_text.strip():
prompt_parts.append(self.seed_text.strip())
Comment on lines +55 to +59
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

当前 _build_user_prompt 方法的逻辑可能导致 seed_text 被重复添加。如果 style_promptdialect 存在,prompt_parts 将不为空,然后 seed_text 会再次被追加。这可能不是预期行为。

建议修改逻辑,确保 seed_text 只在 prompt_parts 为空时作为默认值返回,或者只在 prompt_parts 不为空时追加一次。

Suggested change
if not prompt_parts:
return self.seed_text
if self.seed_text.strip():
prompt_parts.append(self.seed_text.strip())
if not prompt_parts:
return self.seed_text
# 如果 prompt_parts 不为空,且 seed_text 存在,则追加 seed_text
if self.seed_text.strip() and prompt_parts:
prompt_parts.append(self.seed_text.strip())
return " ".join(prompt_parts)


return " ".join(prompt_parts)

def _build_payload(self, text: str) -> dict:
return {
"model": self.model_name,
"messages": [
{
"role": "user",
"content": self._build_user_prompt(),
},
{
"role": "assistant",
"content": text,
},
],
"audio": {
"format": self.audio_format,
"voice": self.voice,
},
}

async def get_audio(self, text: str) -> str:
response = await self.client.post(
build_api_url(self.api_base),
headers=build_headers(self.chosen_api_key),
json=self._build_payload(text),
)

try:
response.raise_for_status()
except Exception as exc:
error_text = response.text[:1024]
raise Exception(
f"MiMo TTS API request failed: HTTP {response.status_code}, response: {error_text}"
) from exc

data = response.json()
audio_data = (
data.get("choices", [{}])[0].get("message", {}).get("audio", {}).get("data")
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
Outdated
)
if not audio_data:
raise Exception(f"MiMo TTS API returned no audio payload: {data}")

output_path = (
get_temp_dir() / f"mimo_tts_api_{uuid.uuid4()}.{self.audio_format}"
)
output_path.write_bytes(base64.b64decode(audio_data))
return str(output_path)

async def terminate(self):
if self.client:
await self.client.aclose()
1 change: 1 addition & 0 deletions dashboard/src/composables/useProviderSources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ export function useProviderSources(options: UseProviderSourcesOptions) {
openai_whisper_selfhost: 'speech_to_text',
sensevoice_stt_selfhost: 'speech_to_text',
openai_tts_api: 'text_to_speech',
mimo_tts_api: 'text_to_speech',
edge_tts: 'text_to_speech',
gsvi_tts_api: 'text_to_speech',
fishaudio_tts_api: 'text_to_speech',
Expand Down
22 changes: 21 additions & 1 deletion dashboard/src/i18n/locales/en-US/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,26 @@
"description": "voice",
"hint": "OpenAI TTS voice. OpenAI defaults: 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'."
},
"mimo-tts-voice": {
"description": "Voice",
"hint": "MiMo TTS voice name. Default is 'mimo_default'."
},
"mimo-tts-format": {
"description": "Output format",
"hint": "Audio format generated by MiMo TTS, for example 'wav'."
},
"mimo-tts-style-prompt": {
"description": "Style prompt",
"hint": "Guides speaking style, tone, or emotion such as gentle, lively, or calm. Optional."
},
"mimo-tts-dialect": {
"description": "Dialect",
"hint": "Target dialect or accent for generated speech, such as Sichuan dialect. Optional."
},
"mimo-tts-seed-text": {
"description": "Seed text",
"hint": "Reference text used to guide voice characteristics and speaking style."
},
"fishaudio-tts-character": {
"description": "character",
"hint": "Fishaudio TTS character. Default is Klee. More roles: https://fish.audio/zh-CN/discovery"
Expand Down Expand Up @@ -1518,4 +1538,4 @@
"helpMiddle": "or",
"helpSuffix": "."
}
}
}
22 changes: 21 additions & 1 deletion dashboard/src/i18n/locales/ru-RU/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,26 @@
"description": "API Base URL",
"hint": "Голоса OpenAI TTS: alloy, echo и др."
},
"mimo-tts-voice": {
"description": "Голос",
"hint": "Имя голоса MiMo TTS. По умолчанию 'mimo_default'."
},
"mimo-tts-format": {
"description": "Формат вывода",
"hint": "Формат аудио, создаваемого MiMo TTS, например 'wav'."
},
"mimo-tts-style-prompt": {
"description": "Подсказка стиля",
"hint": "Задает стиль речи, тон или эмоцию, например мягкий, живой или спокойный. Необязательно."
},
"mimo-tts-dialect": {
"description": "Диалект",
"hint": "Диалект или акцент для синтезируемой речи, например сычуаньский диалект. Необязательно."
},
"mimo-tts-seed-text": {
"description": "Начальный текст",
"hint": "Эталонный текст, который помогает задать особенности голоса и манеру речи."
},
"fishaudio-tts-character": {
"description": "Персонаж",
"hint": "Персонаж Fishaudio. По умолчанию Klee."
Expand Down Expand Up @@ -1523,4 +1543,4 @@
"helpMiddle": "или",
"helpSuffix": "."
}
}
}
22 changes: 21 additions & 1 deletion dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,26 @@
"description": "voice",
"hint": "OpenAI TTS 的声音。OpenAI 默认支持:'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'"
},
"mimo-tts-voice": {
"description": "音色",
"hint": "MiMo TTS 的音色名称。默认值为 'mimo_default'。"
},
"mimo-tts-format": {
"description": "输出格式",
"hint": "MiMo TTS 生成音频的格式,例如 'wav'。"
},
"mimo-tts-style-prompt": {
"description": "风格提示词",
"hint": "用于控制生成语音的说话风格、语气或情绪,例如温柔、活泼、沉稳等。可留空。"
},
"mimo-tts-dialect": {
"description": "方言",
"hint": "指定生成语音时使用的方言或口音,例如四川话、粤语口音等。可留空。"
},
"mimo-tts-seed-text": {
"description": "种子文本",
"hint": "用于引导音色和说话方式的参考文本,会影响生成语音的表达风格。"
},
"fishaudio-tts-character": {
"description": "character",
"hint": "fishaudio TTS 的角色。默认为可莉。更多角色请访问:https://fish.audio/zh-CN/discovery"
Expand Down Expand Up @@ -1520,4 +1540,4 @@
"helpMiddle": "或",
"helpSuffix": "。"
}
}
}
1 change: 1 addition & 0 deletions dashboard/src/utils/providerUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export function getProviderIcon(type) {
'lm_studio': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@latest/icons/lmstudio.svg',
'fishaudio': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@latest/icons/fishaudio.svg',
'minimax': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@latest/icons/minimax.svg',
'mimo': 'https://platform.xiaomimimo.com/favicon.874c9507.png',
'302ai': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@1.53.0/icons/ai302-color.svg',
'microsoft': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@latest/icons/microsoft.svg',
'vllm': 'https://cdn.jsdelivr.net/npm/@lobehub/icons-static-svg@latest/icons/vllm.svg',
Expand Down