Skip to content

Commit 0787463

Browse files
committed
Add Python API examples for Supertonic TTS
1 parent b1f921f commit 0787463

5 files changed

Lines changed: 123 additions & 1 deletion

File tree

.github/scripts/test-python.sh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,20 @@ log() {
88
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
99
}
1010

11+
log "test Supertonic TTS"
12+
13+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
14+
tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
15+
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
16+
17+
python3 ./supertonic-tts.py
18+
19+
rm -rf sherpa-onnx-supertonic-tts-int8-2026-03-06
20+
21+
mkdir -p tts
22+
cp supertonic-en.wav tts/
23+
ls -lh tts
24+
1125
log "test Moonshine v2"
1226

1327
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-quantized-2026-02-27.tar.bz2
@@ -399,7 +413,7 @@ done
399413

400414
log "Offline TTS test"
401415
# test waves are saved in ./tts
402-
mkdir ./tts
416+
mkdir -p ./tts
403417

404418
log "test kitten tts"
405419

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,4 @@ sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17
176176
sherpa-onnx-fire-red-asr2-ctc-zh_en-int8-2026-02-25
177177
non-streaming-fire-red-asr-ctc-decode-files
178178
sherpa-onnx-moonshine-*-quantized-2026-02-27
179+
sherpa-onnx-supertonic-tts-int8-2026-03-06
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright (c) 2026 Xiaomi Corporation
4+
5+
"""
6+
This file demonstrates how to use sherpa-onnx Python API
7+
for SupertonicTTS.
8+
9+
10+
Usage:
11+
12+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
13+
tar xvf sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
14+
rm sherpa-onnx-supertonic-tts-int8-2026-03-06.tar.bz2
15+
16+
python3 ./supertonic-tts.py
17+
18+
You can find more models at
19+
https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
20+
21+
Please see
22+
https://k2-fsa.github.io/sherpa/onnx/tts/supertonic.html
23+
for details.
24+
25+
"""
26+
27+
import time
28+
29+
import sherpa_onnx
30+
import soundfile as sf
31+
32+
33+
def create_tts():
34+
tts_config = sherpa_onnx.OfflineTtsConfig(
35+
model=sherpa_onnx.OfflineTtsModelConfig(
36+
supertonic=sherpa_onnx.OfflineTtsSupertonicModelConfig(
37+
duration_predictor="./sherpa-onnx-supertonic-tts-int8-2026-03-06/duration_predictor.int8.onnx",
38+
text_encoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/text_encoder.int8.onnx",
39+
vector_estimator="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vector_estimator.int8.onnx",
40+
vocoder="./sherpa-onnx-supertonic-tts-int8-2026-03-06/vocoder.int8.onnx",
41+
tts_json="./sherpa-onnx-supertonic-tts-int8-2026-03-06/tts.json",
42+
unicode_indexer="./sherpa-onnx-supertonic-tts-int8-2026-03-06/unicode_indexer.bin",
43+
voice_style="./sherpa-onnx-supertonic-tts-int8-2026-03-06/voice.bin",
44+
),
45+
debug=False,
46+
num_threads=2,
47+
provider="cpu",
48+
)
49+
)
50+
if not tts_config.validate():
51+
raise ValueError(
52+
"Please read the previous error messages and re-check your config"
53+
)
54+
55+
return sherpa_onnx.OfflineTts(tts_config)
56+
57+
58+
def main():
59+
tts = create_tts()
60+
61+
text = "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be, a statesman, a businessman, an official, or a scholar."
62+
63+
gen_config = sherpa_onnx.GenerationConfig()
64+
65+
# This model has 10 speakers. Valid sid: 0-9
66+
gen_config.sid = 6
67+
gen_config.num_steps = 5
68+
gen_config.speed = 1.25 # larger -> faster
69+
70+
# We use en for English.
71+
# You can also use es, pt, fr, ko.
72+
# This single model supports 5 languages.
73+
gen_config.extra["lang"] = "en"
74+
75+
start = time.time()
76+
audio = tts.generate(text, gen_config)
77+
end = time.time()
78+
79+
if len(audio.samples) == 0:
80+
print("Error in generating audios. Please read previous error messages.")
81+
return
82+
83+
elapsed_seconds = end - start
84+
audio_duration = len(audio.samples) / audio.sample_rate
85+
real_time_factor = elapsed_seconds / audio_duration
86+
87+
output_filename = "./supertonic-en.wav"
88+
sf.write(
89+
output_filename,
90+
audio.samples,
91+
samplerate=audio.sample_rate,
92+
subtype="PCM_16",
93+
)
94+
print(f"Saved to {output_filename}")
95+
print(f"The text is '{text}'")
96+
print(f"Elapsed seconds: {elapsed_seconds:.3f}")
97+
print(f"Audio duration in seconds: {audio_duration:.3f}")
98+
print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
99+
100+
101+
if __name__ == "__main__":
102+
main()

sherpa-onnx/csrc/offline-tts-supertonic-impl.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,10 @@ void OfflineTtsSupertonicImpl::InitVoiceStyle(const std::vector<char> &buf) {
603603
}
604604
num_speakers_ = num_speakers;
605605
full_style_ = std::move(style);
606+
607+
if (config_.model.debug) {
608+
SHERPA_ONNX_LOGE("Number of speakers: %d", num_speakers_);
609+
}
606610
}
607611

608612
OfflineTtsSupertonicImpl::StyleSliceView

sherpa-onnx/python/sherpa_onnx/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
OfflineTtsMatchaModelConfig,
5252
OfflineTtsModelConfig,
5353
OfflineTtsPocketModelConfig,
54+
OfflineTtsSupertonicModelConfig,
5455
OfflineTtsVitsModelConfig,
5556
OfflineTtsZipvoiceModelConfig,
5657
OfflineWenetCtcModelConfig,

0 commit comments

Comments
 (0)