fix: enhance text filtering for Edge TTS

p0n1 · p0n1 · commit fd76745ae36b · 2025-07-25T16:12:55.000+08:00
- Added a method to check for meaningful text, filtering out empty strings and pure punctuations which may cause NoAudioReceived error in Edge TTS.
- Updated the chunking logic to return only meaningful text parts, reducing the NoAudioReceived error.
- Introduced unit tests to validate the new filtering logic and ensure proper handling of various text cases.
diff --git a/audiobook_generator/tts_providers/edge_tts_provider.py b/audiobook_generator/tts_providers/edge_tts_provider.py
@@ -74,9 +74,43 @@ def parse_text(self):
             return [self.full_text]
 
         parts = self.full_text.split(self.break_string)
-        parts = [p for p in parts if p.strip()] # skip empty parts
-        logger.debug(f"split into <{len(parts)}> parts: {parts}")
-        return parts
+
+        # Filter out empty parts and parts that don't contain meaningful text which may cause NoAudioReceived error in Edge TTS, then strip each meaningful part
+        meaningful_parts = []
+        for part in parts:
+            if self._is_meaningful_text(part):
+                meaningful_parts.append(part.strip())
+        
+        logger.debug(f"split into <{len(meaningful_parts)}> meaningful parts: {meaningful_parts}")
+        return meaningful_parts
+
+    def _is_meaningful_text(self, text: str) -> bool:
+        """
+        Check if a text chunk contains meaningful content for Edge TTS generation.
+
+        Args:
+            text: The text chunk to check
+
+        Returns:
+            True if the text is meaningful for Edge TTS, False otherwise
+        """
+
+        stripped_text = text.strip()
+        if not stripped_text:
+            return False
+
+        # Check if the text contains any alphanumeric characters
+        # This filters out problematic pure punctuations without alphanumeric content which may cause NoAudioReceived error in Edge TTS
+        # but keeps single letters like 'A', 'B', 'C', or 'A,' 'B,' 'C,'
+        if not any(
+            char.isalnum() for char in stripped_text
+        ):  # means every character in the text is not alphanumeric
+            if len(stripped_text) >= 50:
+                logger.warning(
+                    f"Found a long text chunk without alphanumeric content: <{stripped_text}>, this might be a bug for specific text, please open an issue on https://github.com/p0n1/epub_to_audiobook/issues"
+                )
+            return False
+        return True
 
     async def chunkify(self):
         logger.debug(f"Chunkifying the text")
diff --git a/tests/audiobook_generator/tts_providers/edge_tts_provider_test.py b/tests/audiobook_generator/tts_providers/edge_tts_provider_test.py
@@ -0,0 +1,160 @@
+import unittest
+from unittest.mock import MagicMock
+
+from audiobook_generator.config.general_config import GeneralConfig
+from audiobook_generator.tts_providers.edge_tts_provider import CommWithPauses
+
+
+def get_edge_config():
+    """Helper function to create a basic EdgeTTS config for testing"""
+    args = MagicMock(
+        input_file='../../../examples/The_Life_and_Adventures_of_Robinson_Crusoe.epub',
+        output_folder='output',
+        preview=False,
+        output_text=False,
+        log='INFO',
+        newline_mode='double',
+        chapter_start=1,
+        chapter_end=-1,
+        remove_endnotes=False,
+        tts='edge',
+        language='en-US',
+        voice_name='en-US-GuyNeural',
+        output_format='audio-24khz-48kbitrate-mono-mp3',
+        model_name='',
+        break_duration='1250'
+    )
+    return GeneralConfig(args)
+
+
+class TestEdgeTtsProvider(unittest.TestCase):
+
+    def setUp(self):
+        """Set up test fixtures"""
+        self.comm_with_pauses = CommWithPauses(
+            text="Test text",
+            voice_name="en-US-GuyNeural",
+            break_string=" @BRK#".strip(),
+            break_duration=1250,
+            output_format_ext="mp3"
+        )
+
+    def test_is_meaningful_text_empty_strings(self):
+        """Test that empty strings are filtered out"""
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text(""))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("   "))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("\t"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("\n"))
+
+    def test_is_meaningful_text_single_letters(self):
+        """Test that single letters are kept"""
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("A"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("B"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("C"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("I"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("a"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("1"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("0"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("的"))
+
+    def test_is_meaningful_text_single_punctuation(self):
+        """Test that single punctuation marks are filtered out"""
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("'"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("."))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text(","))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("!"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("?"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text(":"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text(";"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("。"))
+
+    def test_is_meaningful_text_short_mixed_content(self):
+        """Test short text with mixed content (letters + punctuation)"""
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("A."))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("B,"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("C!"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("I?"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("a:"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("1;"))
+
+    def test_is_meaningful_text_short_punctuation_only(self):
+        """Test short sequences of punctuation (should be filtered for 5 chars or less)"""
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("--"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("..."))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("!!!"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("????"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("-----"))
+
+    def test_is_meaningful_text_longer_punctuation(self):
+        """Test longer punctuation sequences (should be filtered for 6+ chars)"""
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("......"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("------"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("!!!!!!"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("???????"))
+
+    def test_is_meaningful_text_words(self):
+        """Test that regular words are always kept"""
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("world"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("test123"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello!"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("Hello, world!"))
+
+    def test_is_meaningful_text_whitespace_handling(self):
+        """Test that whitespace is properly stripped"""
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("  A  "))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("\tHello\t"))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("\n1\n"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("  '  "))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("\t.\t"))
+
+    def test_is_meaningful_text_more_cases(self):
+        """Test more mixed cases"""
+        # Exactly 5 characters - punctuation only (should be filtered)
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("'...'"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text(",,,,,"))
+        
+        # Exactly 5 characters - with alphanumeric (should be kept)
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("A...."))
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text("1,,,,"))
+        
+        # Exactly 6 characters - punctuation only (should be filtered)
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("''''''"))
+        self.assertFalse(self.comm_with_pauses._is_meaningful_text("......"))
+
+        self.assertTrue(self.comm_with_pauses._is_meaningful_text(".............你好世界"))
+
+    def test_parse_text_filters_meaningless_chunks(self):
+        """Test that parse_text properly filters out meaningless chunks"""
+        # Create a CommWithPauses instance with text that contains meaningless chunks
+        comm = CommWithPauses(
+            text="Hello @BRK# ' @BRK# world @BRK# ... @BRK# A @BRK# ------ @BRK# end",
+            voice_name="en-US-GuyNeural",
+            break_string=" @BRK#".strip(),
+            break_duration=1250,
+            output_format_ext="mp3"
+        )
+        
+        parsed = comm.parsed
+        
+        # Should keep: "Hello", "world", "A", "end"
+        # Should filter: "'" (single punctuation), "..." (3 chars punctuation only), "------" (6 chars punctuation only)
+        expected_chunks = ["Hello", "world", "A", "end"]
+        self.assertEqual(parsed, expected_chunks)
+
+    def test_parse_text_no_break_string(self):
+        """Test that parse_text handles text without break strings"""
+        comm = CommWithPauses(
+            text="This is a test without breaks",
+            voice_name="en-US-GuyNeural",
+            break_string=" @BRK#".strip(),
+            break_duration=1250,
+            output_format_ext="mp3"
+        )
+        
+        parsed = comm.parsed
+        self.assertEqual(parsed, ["This is a test without breaks"])
+
+
+if __name__ == '__main__':
+    unittest.main()