Skip to content

Commit 316b6b8

Browse files
committed
fix(whatsapp): Refine typing indicator and TTS error handling
- Move typing indicator logic to be sent after TTS decision to avoid premature notifications - Introduce validation for TTS configuration before synthesis attempts - Enhance error handling for TTS failures, specifying reasons for fallback to text - Adjust typing indicator duration based on text length when TTS is attempted - Ensure typing indicator is sent appropriately based on TTS success or failure
1 parent ad3d504 commit 316b6b8

File tree

1 file changed

+82
-20
lines changed

1 file changed

+82
-20
lines changed

agentle/agents/whatsapp/whatsapp_bot.py

Lines changed: 82 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,14 +1237,8 @@ async def _process_message_batch(
12371237
session.context_data["sending_started_at"] = datetime.now().isoformat()
12381238
await self.provider.update_session(session)
12391239

1240-
# Show typing indicator
1241-
if self.config.typing_indicator:
1242-
logger.debug(
1243-
f"[BATCH_PROCESSING] Sending typing indicator to {phone_number}"
1244-
)
1245-
await self.provider.send_typing_indicator(
1246-
phone_number, self.config.typing_duration
1247-
)
1240+
# Note: Typing indicator is now sent in _send_response after TTS decision
1241+
# to avoid sending it before determining if audio should be sent
12481242

12491243
# Get all pending messages
12501244
pending_messages = session.clear_pending_messages()
@@ -2082,11 +2076,14 @@ async def _send_response(
20822076
)
20832077

20842078
# Check if we should send audio via TTS
2085-
if (
2079+
should_attempt_tts = (
20862080
self.tts_provider
20872081
and self.config.speech_config
20882082
and self.config.speech_play_chance > 0
2089-
):
2083+
and self._validate_tts_configuration()
2084+
)
2085+
2086+
if should_attempt_tts:
20902087
import random
20912088

20922089
# Determine if we should play speech based on chance
@@ -2102,11 +2099,19 @@ async def _send_response(
21022099
logger.debug(
21032100
f"[TTS] Sending recording indicator to {to} during synthesis"
21042101
)
2102+
# Use a more appropriate duration for recording indicator
2103+
# Based on text length: minimum 2s, maximum 10s, or estimated synthesis time
2104+
estimated_duration = max(
2105+
2, min(10, len(response_text) // 50 + 2)
2106+
)
21052107
await self.provider.send_recording_indicator(
2106-
to, self.config.typing_duration
2108+
to, estimated_duration
21072109
)
21082110

21092111
# Synthesize speech
2112+
# We know these are not None due to validation above
2113+
assert self.tts_provider is not None
2114+
assert self.config.speech_config is not None
21102115
speech_result = await self.tts_provider.synthesize_async(
21112116
response_text, config=self.config.speech_config
21122117
)
@@ -2133,22 +2138,37 @@ async def _send_response(
21332138
return
21342139

21352140
except Exception as e:
2136-
logger.warning(
2137-
f"[TTS] Failed to send audio response to {to}, falling back to text: {e}",
2138-
extra={
2139-
"to_number": to,
2140-
"error_type": type(e).__name__,
2141-
"error": str(e),
2142-
},
2143-
)
2141+
# Check if this is a specific Evolution API media upload error
2142+
error_message = str(e).lower()
2143+
if "media upload failed" in error_message or "400" in error_message:
2144+
logger.warning(
2145+
f"[TTS] Evolution API media upload failed for {to}, falling back to text: {e}",
2146+
extra={
2147+
"to_number": to,
2148+
"error_type": type(e).__name__,
2149+
"error": str(e),
2150+
"fallback_reason": "evolution_api_media_upload_failed",
2151+
},
2152+
)
2153+
else:
2154+
logger.warning(
2155+
f"[TTS] Failed to send audio response to {to}, falling back to text: {e}",
2156+
extra={
2157+
"to_number": to,
2158+
"error_type": type(e).__name__,
2159+
"error": str(e),
2160+
"fallback_reason": "tts_synthesis_or_send_failed",
2161+
},
2162+
)
21442163
# Fall through to send text message instead
21452164

21462165
# Split messages by line breaks and length
21472166
messages = self._split_message_by_line_breaks(response_text)
21482167
logger.info(f"[SEND_RESPONSE] Split response into {len(messages)} parts")
21492168

21502169
# Show typing indicator ONCE before sending all messages
2151-
if self.config.typing_indicator:
2170+
# Only send typing indicator if we're not attempting TTS or if TTS failed
2171+
if self.config.typing_indicator and not should_attempt_tts:
21522172
try:
21532173
logger.debug(
21542174
f"[SEND_RESPONSE] Sending typing indicator to {to} before sending {len(messages)} message(s)"
@@ -2159,6 +2179,18 @@ async def _send_response(
21592179
except Exception as e:
21602180
# Don't let typing indicator failures break message sending
21612181
logger.warning(f"[SEND_RESPONSE] Failed to send typing indicator: {e}")
2182+
elif self.config.typing_indicator and should_attempt_tts:
2183+
# TTS was attempted but failed, send typing indicator for text fallback
2184+
try:
2185+
logger.debug(
2186+
f"[SEND_RESPONSE] TTS failed, sending typing indicator to {to} for text fallback"
2187+
)
2188+
await self.provider.send_typing_indicator(
2189+
to, self.config.typing_duration
2190+
)
2191+
except Exception as e:
2192+
# Don't let typing indicator failures break message sending
2193+
logger.warning(f"[SEND_RESPONSE] Failed to send typing indicator: {e}")
21622194

21632195
# Track sending state to handle partial failures
21642196
successfully_sent_count = 0
@@ -2257,6 +2289,36 @@ async def _send_response(
22572289
f"[SEND_RESPONSE] Successfully sent all {len(messages)} message parts to {to}"
22582290
)
22592291

2292+
def _validate_tts_configuration(self) -> bool:
2293+
"""Validate TTS configuration before attempting synthesis."""
2294+
try:
2295+
if not self.config.speech_config:
2296+
logger.debug("[TTS_VALIDATION] No speech_config provided")
2297+
return False
2298+
2299+
# Check if voice_id is provided
2300+
if not self.config.speech_config.voice_id:
2301+
logger.warning(
2302+
"[TTS_VALIDATION] speech_config.voice_id is required but not provided"
2303+
)
2304+
return False
2305+
2306+
# Check if TTS provider is properly configured
2307+
if not self.tts_provider:
2308+
logger.warning("[TTS_VALIDATION] TTS provider is not configured")
2309+
return False
2310+
2311+
logger.debug(
2312+
f"[TTS_VALIDATION] TTS configuration is valid: voice_id={self.config.speech_config.voice_id}"
2313+
)
2314+
return True
2315+
2316+
except Exception as e:
2317+
logger.warning(
2318+
f"[TTS_VALIDATION] Failed to validate TTS configuration: {e}"
2319+
)
2320+
return False
2321+
22602322
def _split_message_by_line_breaks(self, text: str) -> Sequence[str]:
22612323
"""Split message by line breaks first, then by length if needed with enhanced validation."""
22622324
if not text or not text.strip():

0 commit comments

Comments
 (0)