Skip to content

Commit fd76745

Browse files
committed
fix: enhance text filtering for Edge TTS
- Added a method to check for meaningful text, filtering out empty strings and pure punctuations which may cause NoAudioReceived error in Edge TTS. - Updated the chunking logic to return only meaningful text parts, reducing the NoAudioReceived error. - Introduced unit tests to validate the new filtering logic and ensure proper handling of various text cases.
1 parent f387291 commit fd76745

File tree

2 files changed

+197
-3
lines changed

2 files changed

+197
-3
lines changed

audiobook_generator/tts_providers/edge_tts_provider.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,43 @@ def parse_text(self):
7474
return [self.full_text]
7575

7676
parts = self.full_text.split(self.break_string)
77-
parts = [p for p in parts if p.strip()] # skip empty parts
78-
logger.debug(f"split into <{len(parts)}> parts: {parts}")
79-
return parts
77+
78+
# Filter out empty parts and parts that don't contain meaningful text which may cause NoAudioReceived error in Edge TTS, then strip each meaningful part
79+
meaningful_parts = []
80+
for part in parts:
81+
if self._is_meaningful_text(part):
82+
meaningful_parts.append(part.strip())
83+
84+
logger.debug(f"split into <{len(meaningful_parts)}> meaningful parts: {meaningful_parts}")
85+
return meaningful_parts
86+
87+
def _is_meaningful_text(self, text: str) -> bool:
88+
"""
89+
Check if a text chunk contains meaningful content for Edge TTS generation.
90+
91+
Args:
92+
text: The text chunk to check
93+
94+
Returns:
95+
True if the text is meaningful for Edge TTS, False otherwise
96+
"""
97+
98+
stripped_text = text.strip()
99+
if not stripped_text:
100+
return False
101+
102+
# Check if the text contains any alphanumeric characters
103+
# This filters out problematic pure punctuations without alphanumeric content which may cause NoAudioReceived error in Edge TTS
104+
# but keeps single letters like 'A', 'B', 'C', or 'A,' 'B,' 'C,'
105+
if not any(
106+
char.isalnum() for char in stripped_text
107+
): # means every character in the text is not alphanumeric
108+
if len(stripped_text) >= 50:
109+
logger.warning(
110+
f"Found a long text chunk without alphanumeric content: <{stripped_text}>, this might be a bug for specific text, please open an issue on https://github.com/p0n1/epub_to_audiobook/issues"
111+
)
112+
return False
113+
return True
80114

81115
async def chunkify(self):
82116
logger.debug(f"Chunkifying the text")
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import unittest
2+
from unittest.mock import MagicMock
3+
4+
from audiobook_generator.config.general_config import GeneralConfig
5+
from audiobook_generator.tts_providers.edge_tts_provider import CommWithPauses
6+
7+
8+
def get_edge_config():
9+
"""Helper function to create a basic EdgeTTS config for testing"""
10+
args = MagicMock(
11+
input_file='../../../examples/The_Life_and_Adventures_of_Robinson_Crusoe.epub',
12+
output_folder='output',
13+
preview=False,
14+
output_text=False,
15+
log='INFO',
16+
newline_mode='double',
17+
chapter_start=1,
18+
chapter_end=-1,
19+
remove_endnotes=False,
20+
tts='edge',
21+
language='en-US',
22+
voice_name='en-US-GuyNeural',
23+
output_format='audio-24khz-48kbitrate-mono-mp3',
24+
model_name='',
25+
break_duration='1250'
26+
)
27+
return GeneralConfig(args)
28+
29+
30+
class TestEdgeTtsProvider(unittest.TestCase):
31+
32+
def setUp(self):
33+
"""Set up test fixtures"""
34+
self.comm_with_pauses = CommWithPauses(
35+
text="Test text",
36+
voice_name="en-US-GuyNeural",
37+
break_string=" @BRK#".strip(),
38+
break_duration=1250,
39+
output_format_ext="mp3"
40+
)
41+
42+
def test_is_meaningful_text_empty_strings(self):
43+
"""Test that empty strings are filtered out"""
44+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(""))
45+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(" "))
46+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("\t"))
47+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("\n"))
48+
49+
def test_is_meaningful_text_single_letters(self):
50+
"""Test that single letters are kept"""
51+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("A"))
52+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("B"))
53+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("C"))
54+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("I"))
55+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("a"))
56+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("1"))
57+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("0"))
58+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("的"))
59+
60+
def test_is_meaningful_text_single_punctuation(self):
61+
"""Test that single punctuation marks are filtered out"""
62+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("'"))
63+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("."))
64+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(","))
65+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("!"))
66+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("?"))
67+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(":"))
68+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(";"))
69+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("。"))
70+
71+
def test_is_meaningful_text_short_mixed_content(self):
72+
"""Test short text with mixed content (letters + punctuation)"""
73+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("A."))
74+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("B,"))
75+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("C!"))
76+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("I?"))
77+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("a:"))
78+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("1;"))
79+
80+
def test_is_meaningful_text_short_punctuation_only(self):
81+
"""Test short sequences of punctuation (should be filtered for 5 chars or less)"""
82+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("--"))
83+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("..."))
84+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("!!!"))
85+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("????"))
86+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("-----"))
87+
88+
def test_is_meaningful_text_longer_punctuation(self):
89+
"""Test longer punctuation sequences (should be filtered for 6+ chars)"""
90+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("......"))
91+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("------"))
92+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("!!!!!!"))
93+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("???????"))
94+
95+
def test_is_meaningful_text_words(self):
96+
"""Test that regular words are always kept"""
97+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello"))
98+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("world"))
99+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("test123"))
100+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello!"))
101+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello, world!"))
102+
103+
def test_is_meaningful_text_whitespace_handling(self):
104+
"""Test that whitespace is properly stripped"""
105+
self.assertTrue(self.comm_with_pauses._is_meaningful_text(" A "))
106+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("\tHello\t"))
107+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("\n1\n"))
108+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(" ' "))
109+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("\t.\t"))
110+
111+
def test_is_meaningful_text_more_cases(self):
112+
"""Test more mixed cases"""
113+
# Exactly 5 characters - punctuation only (should be filtered)
114+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("'...'"))
115+
self.assertFalse(self.comm_with_pauses._is_meaningful_text(",,,,,"))
116+
117+
# Exactly 5 characters - with alphanumeric (should be kept)
118+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("A...."))
119+
self.assertTrue(self.comm_with_pauses._is_meaningful_text("1,,,,"))
120+
121+
# Exactly 6 characters - punctuation only (should be filtered)
122+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("''''''"))
123+
self.assertFalse(self.comm_with_pauses._is_meaningful_text("......"))
124+
125+
self.assertTrue(self.comm_with_pauses._is_meaningful_text(".............你好世界"))
126+
127+
def test_parse_text_filters_meaningless_chunks(self):
128+
"""Test that parse_text properly filters out meaningless chunks"""
129+
# Create a CommWithPauses instance with text that contains meaningless chunks
130+
comm = CommWithPauses(
131+
text="Hello @BRK# ' @BRK# world @BRK# ... @BRK# A @BRK# ------ @BRK# end",
132+
voice_name="en-US-GuyNeural",
133+
break_string=" @BRK#".strip(),
134+
break_duration=1250,
135+
output_format_ext="mp3"
136+
)
137+
138+
parsed = comm.parsed
139+
140+
# Should keep: "Hello", "world", "A", "end"
141+
# Should filter: "'" (single punctuation), "..." (3 chars punctuation only), "------" (6 chars punctuation only)
142+
expected_chunks = ["Hello", "world", "A", "end"]
143+
self.assertEqual(parsed, expected_chunks)
144+
145+
def test_parse_text_no_break_string(self):
146+
"""Test that parse_text handles text without break strings"""
147+
comm = CommWithPauses(
148+
text="This is a test without breaks",
149+
voice_name="en-US-GuyNeural",
150+
break_string=" @BRK#".strip(),
151+
break_duration=1250,
152+
output_format_ext="mp3"
153+
)
154+
155+
parsed = comm.parsed
156+
self.assertEqual(parsed, ["This is a test without breaks"])
157+
158+
159+
if __name__ == '__main__':
160+
unittest.main()

0 commit comments

Comments
 (0)