@@ -1237,14 +1237,8 @@ async def _process_message_batch(
12371237 session .context_data ["sending_started_at" ] = datetime .now ().isoformat ()
12381238 await self .provider .update_session (session )
12391239
1240- # Show typing indicator
1241- if self .config .typing_indicator :
1242- logger .debug (
1243- f"[BATCH_PROCESSING] Sending typing indicator to { phone_number } "
1244- )
1245- await self .provider .send_typing_indicator (
1246- phone_number , self .config .typing_duration
1247- )
1240+ # Note: Typing indicator is now sent in _send_response after TTS decision
1241+ # to avoid sending it before determining if audio should be sent
12481242
12491243 # Get all pending messages
12501244 pending_messages = session .clear_pending_messages ()
@@ -2082,11 +2076,14 @@ async def _send_response(
20822076 )
20832077
20842078 # Check if we should send audio via TTS
2085- if (
2079+ should_attempt_tts = (
20862080 self .tts_provider
20872081 and self .config .speech_config
20882082 and self .config .speech_play_chance > 0
2089- ):
2083+ and self ._validate_tts_configuration ()
2084+ )
2085+
2086+ if should_attempt_tts :
20902087 import random
20912088
20922089 # Determine if we should play speech based on chance
@@ -2102,11 +2099,19 @@ async def _send_response(
21022099 logger .debug (
21032100 f"[TTS] Sending recording indicator to { to } during synthesis"
21042101 )
2102+ # Use a more appropriate duration for recording indicator
2103+ # Based on text length: minimum 2s, maximum 10s, or estimated synthesis time
2104+ estimated_duration = max (
2105+ 2 , min (10 , len (response_text ) // 50 + 2 )
2106+ )
21052107 await self .provider .send_recording_indicator (
2106- to , self . config . typing_duration
2108+ to , estimated_duration
21072109 )
21082110
21092111 # Synthesize speech
2112+ # We know these are not None due to validation above
2113+ assert self .tts_provider is not None
2114+ assert self .config .speech_config is not None
21102115 speech_result = await self .tts_provider .synthesize_async (
21112116 response_text , config = self .config .speech_config
21122117 )
@@ -2133,22 +2138,37 @@ async def _send_response(
21332138 return
21342139
21352140 except Exception as e :
2136- logger .warning (
2137- f"[TTS] Failed to send audio response to { to } , falling back to text: { e } " ,
2138- extra = {
2139- "to_number" : to ,
2140- "error_type" : type (e ).__name__ ,
2141- "error" : str (e ),
2142- },
2143- )
2141+ # Check if this is a specific Evolution API media upload error
2142+ error_message = str (e ).lower ()
2143+ if "media upload failed" in error_message or "400" in error_message :
2144+ logger .warning (
2145+ f"[TTS] Evolution API media upload failed for { to } , falling back to text: { e } " ,
2146+ extra = {
2147+ "to_number" : to ,
2148+ "error_type" : type (e ).__name__ ,
2149+ "error" : str (e ),
2150+ "fallback_reason" : "evolution_api_media_upload_failed" ,
2151+ },
2152+ )
2153+ else :
2154+ logger .warning (
2155+ f"[TTS] Failed to send audio response to { to } , falling back to text: { e } " ,
2156+ extra = {
2157+ "to_number" : to ,
2158+ "error_type" : type (e ).__name__ ,
2159+ "error" : str (e ),
2160+ "fallback_reason" : "tts_synthesis_or_send_failed" ,
2161+ },
2162+ )
21442163 # Fall through to send text message instead
21452164
21462165 # Split messages by line breaks and length
21472166 messages = self ._split_message_by_line_breaks (response_text )
21482167 logger .info (f"[SEND_RESPONSE] Split response into { len (messages )} parts" )
21492168
21502169 # Show typing indicator ONCE before sending all messages
2151- if self .config .typing_indicator :
2170+ # Only send typing indicator if we're not attempting TTS or if TTS failed
2171+ if self .config .typing_indicator and not should_attempt_tts :
21522172 try :
21532173 logger .debug (
21542174 f"[SEND_RESPONSE] Sending typing indicator to { to } before sending { len (messages )} message(s)"
@@ -2159,6 +2179,18 @@ async def _send_response(
21592179 except Exception as e :
21602180 # Don't let typing indicator failures break message sending
21612181 logger .warning (f"[SEND_RESPONSE] Failed to send typing indicator: { e } " )
2182+ elif self .config .typing_indicator and should_attempt_tts :
2183+ # TTS was attempted but failed, send typing indicator for text fallback
2184+ try :
2185+ logger .debug (
2186+ f"[SEND_RESPONSE] TTS failed, sending typing indicator to { to } for text fallback"
2187+ )
2188+ await self .provider .send_typing_indicator (
2189+ to , self .config .typing_duration
2190+ )
2191+ except Exception as e :
2192+ # Don't let typing indicator failures break message sending
2193+ logger .warning (f"[SEND_RESPONSE] Failed to send typing indicator: { e } " )
21622194
21632195 # Track sending state to handle partial failures
21642196 successfully_sent_count = 0
@@ -2257,6 +2289,36 @@ async def _send_response(
22572289 f"[SEND_RESPONSE] Successfully sent all { len (messages )} message parts to { to } "
22582290 )
22592291
2292+ def _validate_tts_configuration (self ) -> bool :
2293+ """Validate TTS configuration before attempting synthesis."""
2294+ try :
2295+ if not self .config .speech_config :
2296+ logger .debug ("[TTS_VALIDATION] No speech_config provided" )
2297+ return False
2298+
2299+ # Check if voice_id is provided
2300+ if not self .config .speech_config .voice_id :
2301+ logger .warning (
2302+ "[TTS_VALIDATION] speech_config.voice_id is required but not provided"
2303+ )
2304+ return False
2305+
2306+ # Check if TTS provider is properly configured
2307+ if not self .tts_provider :
2308+ logger .warning ("[TTS_VALIDATION] TTS provider is not configured" )
2309+ return False
2310+
2311+ logger .debug (
2312+ f"[TTS_VALIDATION] TTS configuration is valid: voice_id={ self .config .speech_config .voice_id } "
2313+ )
2314+ return True
2315+
2316+ except Exception as e :
2317+ logger .warning (
2318+ f"[TTS_VALIDATION] Failed to validate TTS configuration: { e } "
2319+ )
2320+ return False
2321+
22602322 def _split_message_by_line_breaks (self , text : str ) -> Sequence [str ]:
22612323 """Split message by line breaks first, then by length if needed with enhanced validation."""
22622324 if not text or not text .strip ():
0 commit comments