Merge pull request pipecat-ai#3424 from pipecat-ai/mb/tts-append-trailing-space

markbackman · web-flow · commit a298ce3b417f · 2026-01-13T10:42:40.000-05:00
Add append_trailing_space to TTSService to prevent vocalizing trailin…
diff --git a/changelog/3424.added.md b/changelog/3424.added.md
@@ -0,0 +1 @@
+- Added `append_trailing_space` parameter to `TTSService` to automatically append a trailing space to text before sending to TTS, helping prevent some services from vocalizing trailing punctuation.
diff --git a/changelog/3424.changed.md b/changelog/3424.changed.md
@@ -0,0 +1 @@
+- `DeepgramTTSService` and `RimeTTSService` now set `append_trailing_space` to `True` to prevent punctuation (e.g., “dot”) from being pronounced.
diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py
@@ -85,6 +85,7 @@ def __init__(
             sample_rate=sample_rate,
             pause_frame_processing=True,
             push_stop_frames=True,
+            append_trailing_space=True,
             **kwargs,
         )
 
@@ -291,24 +292,22 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
         Yields:
             Frame: Audio frames containing the synthesized speech, plus start/stop frames.
         """
-        # Append trailing space to prevent TTS from vocalizing trailing periods as "dot"
-        text_with_trailing_space = text + " "
-        logger.debug(f"{self}: Generating TTS [{text_with_trailing_space}]")
+        logger.debug(f"{self}: Generating TTS [{text}]")
 
         try:
             # Reconnect if the websocket is closed
             if not self._websocket or self._websocket.state is State.CLOSED:
                 await self._connect()
 
             await self.start_ttfb_metrics()
-            await self.start_tts_usage_metrics(text_with_trailing_space)
+            await self.start_tts_usage_metrics(text)
 
             yield TTSStartedFrame()
 
             # Send text message to Deepgram
             # Note: We don't send Flush here - that should only be sent when the
             # LLM finishes a complete response via flush_audio()
-            speak_msg = {"type": "Speak", "text": text_with_trailing_space}
+            speak_msg = {"type": "Speak", "text": text}
             await self._get_websocket().send(json.dumps(speak_msg))
 
             # The audio frames will be handled in _receive_messages
diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py
@@ -130,6 +130,7 @@ def __init__(
             push_text_frames=False,
             push_stop_frames=True,
             pause_frame_processing=True,
+            append_trailing_space=True,
             sample_rate=sample_rate,
             **kwargs,
         )
diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
@@ -101,6 +101,9 @@ def __init__(
         silence_time_s: float = 2.0,
         # if True, we will pause processing frames while we are receiving audio
         pause_frame_processing: bool = False,
+        # if True, append a trailing space to text before sending to TTS
+        # (helps prevent some TTS services from vocalizing trailing punctuation)
+        append_trailing_space: bool = False,
         # TTS output sample rate
         sample_rate: Optional[int] = None,
         # Text aggregator to aggregate incoming tokens and decide when to push to the TTS.
@@ -132,6 +135,8 @@ def __init__(
             push_silence_after_stop: Whether to push silence audio after TTSStoppedFrame.
             silence_time_s: Duration of silence to push when push_silence_after_stop is True.
             pause_frame_processing: Whether to pause frame processing during audio generation.
+            append_trailing_space: Whether to append a trailing space to text before sending to TTS.
+                This helps prevent some TTS services from vocalizing trailing punctuation (e.g., "dot").
             sample_rate: Output sample rate for generated audio.
             text_aggregator: Custom text aggregator for processing incoming text.
 
@@ -161,6 +166,7 @@ def __init__(
         self._push_silence_after_stop: bool = push_silence_after_stop
         self._silence_time_s: float = silence_time_s
         self._pause_frame_processing: bool = pause_frame_processing
+        self._append_trailing_space: bool = append_trailing_space
         self._init_sample_rate = sample_rate
         self._sample_rate = 0
         self._voice_id: str = ""
@@ -273,6 +279,19 @@ def language_to_service_language(self, language: Language) -> Optional[str]:
         """
         return Language(language)
 
+    def _prepare_text_for_tts(self, text: str) -> str:
+        """Prepare text for TTS by applying any transformations required by the TTS service.
+
+        Args:
+            text: The text to prepare.
+
+        Returns:
+            The prepared text with transformations applied.
+        """
+        if self._append_trailing_space and not text.endswith(" "):
+            return text + " "
+        return text
+
     async def update_setting(self, key: str, value: Any):
         """Update a service-specific setting.
 
@@ -603,7 +622,10 @@ async def _push_tts_frames(
         for aggregation_type, transform in self._text_transforms:
             if aggregation_type == type or aggregation_type == "*":
                 transformed_text = await transform(transformed_text, type)
-        await self.process_generator(self.run_tts(transformed_text))
+
+        # Apply any final text preparation (e.g., trailing space)
+        prepared_text = self._prepare_text_for_tts(transformed_text)
+        await self.process_generator(self.run_tts(prepared_text))
 
         await self.stop_processing_metrics()
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+- Added `append_trailing_space` parameter to `TTSService` to automatically append a trailing space to text before sending to TTS, helping prevent some services from vocalizing trailing punctuation.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+- `DeepgramTTSService` and `RimeTTSService` now set `append_trailing_space` to `True` to prevent punctuation (e.g., “dot”) from being pronounced.
Original file line number	Diff line number	Diff line change
`@@ -130,6 +130,7 @@ def __init__(`
`130`	`130`	`push_text_frames=False,`
`131`	`131`	`push_stop_frames=True,`
`132`	`132`	`pause_frame_processing=True,`
	`133`	`+ append_trailing_space=True,`
`133`	`134`	`sample_rate=sample_rate,`
`134`	`135`	`**kwargs,`
`135`	`136`	`)`