You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
- Added a method to check for meaningful text, filtering out empty strings and pure punctuations which may cause NoAudioReceived error in Edge TTS.
- Updated the chunking logic to return only meaningful text parts, reducing the NoAudioReceived error.
- Introduced unit tests to validate the new filtering logic and ensure proper handling of various text cases.
Copy file name to clipboardExpand all lines: audiobook_generator/tts_providers/edge_tts_provider.py
+37-3Lines changed: 37 additions & 3 deletions
Original file line number
Diff line number
Diff line change
@@ -74,9 +74,43 @@ def parse_text(self):
74
74
return [self.full_text]
75
75
76
76
parts=self.full_text.split(self.break_string)
77
-
parts= [pforpinpartsifp.strip()] # skip empty parts
78
-
logger.debug(f"split into <{len(parts)}> parts: {parts}")
79
-
returnparts
77
+
78
+
# Filter out empty parts and parts that don't contain meaningful text which may cause NoAudioReceived error in Edge TTS, then strip each meaningful part
79
+
meaningful_parts= []
80
+
forpartinparts:
81
+
ifself._is_meaningful_text(part):
82
+
meaningful_parts.append(part.strip())
83
+
84
+
logger.debug(f"split into <{len(meaningful_parts)}> meaningful parts: {meaningful_parts}")
85
+
returnmeaningful_parts
86
+
87
+
def_is_meaningful_text(self, text: str) ->bool:
88
+
"""
89
+
Check if a text chunk contains meaningful content for Edge TTS generation.
90
+
91
+
Args:
92
+
text: The text chunk to check
93
+
94
+
Returns:
95
+
True if the text is meaningful for Edge TTS, False otherwise
96
+
"""
97
+
98
+
stripped_text=text.strip()
99
+
ifnotstripped_text:
100
+
returnFalse
101
+
102
+
# Check if the text contains any alphanumeric characters
103
+
# This filters out problematic pure punctuations without alphanumeric content which may cause NoAudioReceived error in Edge TTS
104
+
# but keeps single letters like 'A', 'B', 'C', or 'A,' 'B,' 'C,'
105
+
ifnotany(
106
+
char.isalnum() forcharinstripped_text
107
+
): # means every character in the text is not alphanumeric
108
+
iflen(stripped_text) >=50:
109
+
logger.warning(
110
+
f"Found a long text chunk without alphanumeric content: <{stripped_text}>, this might be a bug for specific text, please open an issue on https://github.com/p0n1/epub_to_audiobook/issues"
0 commit comments