-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Add Python API examples for Supertonic TTS #3264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,102 @@ | ||||||
| #!/usr/bin/env python3 | ||||||
| # | ||||||
| # Copyright (c) 2026 Xiaomi Corporation | ||||||
|
|
||||||
| """ | ||||||
| This file demonstrates how to use sherpa-onnx Python API | ||||||
| for SupertonicTTS. | ||||||
|
|
||||||
|
|
||||||
| Usage: | ||||||
|
|
||||||
| wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2 | ||||||
| tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2 | ||||||
| rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2 | ||||||
|
|
||||||
| python3 ./supertonic-tts.py | ||||||
|
|
||||||
| You can find more models at | ||||||
| https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||||||
|
|
||||||
| Please see | ||||||
| https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html | ||||||
| for details. | ||||||
|
|
||||||
| """ | ||||||
|
|
||||||
| import time | ||||||
|
|
||||||
| import sherpa_onnx | ||||||
| import soundfile as sf | ||||||
|
|
||||||
|
|
||||||
| def create_tts(): | ||||||
| tts_config = sherpa_onnx.OfflineTtsConfig( | ||||||
| model=sherpa_onnx.OfflineTtsModelConfig( | ||||||
| supertonic=sherpa_onnx.OfflineTtsSupertonicModelConfig( | ||||||
| duration_predictor="./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx", | ||||||
| text_encoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx", | ||||||
| vector_estimator="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx", | ||||||
| vocoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx", | ||||||
| tts_json="./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json", | ||||||
| unicode_indexer="./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin", | ||||||
| voice_style="./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin", | ||||||
| ), | ||||||
| debug=False, | ||||||
| num_threads=2, | ||||||
| provider="cpu", | ||||||
| ) | ||||||
| ) | ||||||
| if not tts_config.validate(): | ||||||
| raise ValueError( | ||||||
| "Please read the previous error messages and re-check your config" | ||||||
| ) | ||||||
|
|
||||||
| return sherpa_onnx.OfflineTts(tts_config) | ||||||
|
|
||||||
|
|
||||||
| def main(): | ||||||
| tts = create_tts() | ||||||
|
|
||||||
| text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be, a statesman, a businessman, an official, or a scholar." | ||||||
|
|
||||||
| gen_config = sherpa_onnx.GenerationConfig() | ||||||
|
|
||||||
| # This model has 10 speakers. Valid sid: 0-9 | ||||||
| gen_config.sid = 6 | ||||||
| gen_config.num_steps = 5 | ||||||
| gen_config.speed = 1.25 # larger -> faster | ||||||
|
|
||||||
| # We use en for English. | ||||||
| # You can also use es, pt, fr, ko. | ||||||
| # This single model supports 5 languages. | ||||||
| gen_config.extra["lang"] = "en" | ||||||
|
|
||||||
| start = time.time() | ||||||
| audio = tts.generate(text, gen_config) | ||||||
| end = time.time() | ||||||
|
|
||||||
| if len(audio.samples) == 0: | ||||||
| print("Error in generating audios. Please read previous error messages.") | ||||||
|
||||||
| print("Error in generating audios. Please read previous error messages.") | |
| print("Error in generating audio. Please read previous error messages.") |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -603,6 +603,10 @@ void OfflineTtsSupertonicImpl::InitVoiceStyle(const std::vector<char> &buf) { | |||||
| } | ||||||
| num_speakers_ = num_speakers; | ||||||
| full_style_ = std::move(style); | ||||||
|
|
||||||
| if (config_.model.debug) { | ||||||
| SHERPA_ONNX_LOGE("Number of speakers: %d", num_speakers_); | ||||||
|
||||||
| SHERPA_ONNX_LOGE("Number of speakers: %d", num_speakers_); | |
| SHERPA_ONNX_LOGD("Number of speakers: %d", num_speakers_); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using SHERPA_ONNX_LOGE for a debug message is misleading, as LOGE implies an error-level message. For debug information, it's better to use a logging level appropriate for debugging, such as SHERPA_ONNX_LOG(DEBUG). This improves clarity and allows for more granular control over log verbosity.
if (config_.model.debug) {
SHERPA_ONNX_LOG(DEBUG) << "Number of speakers: " << num_speakers_;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The model directory path is hardcoded in multiple places. To improve maintainability and make it easier to update the model version in the future, it's better to define the model directory in a variable and use f-strings to construct the file paths.