NvidiaSTTService: initialize client on StartFrame

aconchillo · aconchillo · commit 671dc8cd9bbe · 2026-01-20T08:58:14.000-08:00
Initialize client on StartFrame so errrors are reported within the pipeline.
diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py
@@ -134,6 +134,7 @@ def __init__(
 
         params = params or NvidiaSTTService.InputParams()
 
+        self._server = server
         self._api_key = api_key
         self._use_ssl = use_ssl
         self._profanity_filter = False
@@ -162,18 +163,54 @@ def __init__(
 
         self.set_model_name(model_function_map.get("model_name"))
 
+        self._asr_service = None
+        self._queue = None
+        self._config = None
+        self._thread_task = None
+        self._response_task = None
+
+    def _initialize_client(self):
         metadata = [
             ["function-id", self._function_id],
-            ["authorization", f"Bearer {api_key}"],
+            ["authorization", f"Bearer {self._api_key}"],
         ]
-        auth = riva.client.Auth(None, self._use_ssl, server, metadata)
+        auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
 
         self._asr_service = riva.client.ASRService(auth)
 
-        self._queue = None
-        self._config = None
-        self._thread_task = None
-        self._response_task = None
+    def _create_recognition_config(self):
+        """Create the NVIDIA Riva ASR recognition configuration."""
+        config = riva.client.StreamingRecognitionConfig(
+            config=riva.client.RecognitionConfig(
+                encoding=riva.client.AudioEncoding.LINEAR_PCM,
+                language_code=self._language_code,
+                model="",
+                max_alternatives=1,
+                profanity_filter=self._profanity_filter,
+                enable_automatic_punctuation=self._automatic_punctuation,
+                verbatim_transcripts=not self._no_verbatim_transcripts,
+                sample_rate_hertz=self.sample_rate,
+                audio_channel_count=1,
+            ),
+            interim_results=True,
+        )
+
+        riva.client.add_word_boosting_to_config(
+            config, self._boosted_lm_words, self._boosted_lm_score
+        )
+
+        riva.client.add_endpoint_parameters_to_config(
+            config,
+            self._start_history,
+            self._start_threshold,
+            self._stop_history,
+            self._stop_history_eou,
+            self._stop_threshold,
+            self._stop_threshold_eou,
+        )
+        riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
+
+        return config
 
     def can_generate_metrics(self) -> bool:
         """Check if this service can generate processing metrics.
@@ -206,41 +243,9 @@ async def start(self, frame: StartFrame):
             frame: StartFrame indicating pipeline start.
         """
         await super().start(frame)
+        self._initialize_client()
+        self._config = self._create_recognition_config()
 
-        if self._config:
-            return
-
-        config = riva.client.StreamingRecognitionConfig(
-            config=riva.client.RecognitionConfig(
-                encoding=riva.client.AudioEncoding.LINEAR_PCM,
-                language_code=self._language_code,
-                model="",
-                max_alternatives=1,
-                profanity_filter=self._profanity_filter,
-                enable_automatic_punctuation=self._automatic_punctuation,
-                verbatim_transcripts=not self._no_verbatim_transcripts,
-                sample_rate_hertz=self.sample_rate,
-                audio_channel_count=1,
-            ),
-            interim_results=True,
-        )
-
-        riva.client.add_word_boosting_to_config(
-            config, self._boosted_lm_words, self._boosted_lm_score
-        )
-
-        riva.client.add_endpoint_parameters_to_config(
-            config,
-            self._start_history,
-            self._start_threshold,
-            self._stop_history,
-            self._stop_history_eou,
-            self._stop_threshold,
-            self._stop_threshold_eou,
-        )
-        riva.client.add_custom_configuration_to_config(config, self._custom_configuration)
-
-        self._config = config
         self._queue = asyncio.Queue()
 
         if not self._thread_task:
@@ -250,6 +255,8 @@ async def start(self, frame: StartFrame):
             self._response_queue = asyncio.Queue()
             self._response_task = self.create_task(self._response_task_handler())
 
+        logger.debug(f"Initialized NvidiaSTTService with model: {self.model_name}")
+
     async def stop(self, frame: EndFrame):
         """Stop the NVIDIA Riva STT service and clean up resources.