Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
72934bd
Add u3-rt-pro support and improvements to AssemblyAI STT service
zkleb-aai Feb 27, 2026
cd07937
Fix missing imports: Add UserStartedSpeakingFrame and UserStoppedSpea…
zkleb-aai Feb 27, 2026
aa7e9a1
Fix finalization pattern: Use request/confirm in Pipecat mode, finali…
zkleb-aai Feb 27, 2026
6ba9f78
Remove unnecessary SpeechStarted fallback in STT mode
zkleb-aai Feb 27, 2026
45532a9
Remove info logs and unused import per PR feedback
zkleb-aai Feb 27, 2026
ef00f27
Fix incorrect await on synchronous request_finalize() method
zkleb-aai Feb 27, 2026
d7ce1ee
Add foundational examples for AssemblyAI u3-rt-pro
zkleb-aai Feb 27, 2026
21a409e
Update prompt warning and rename min_end_of_turn_silence_when_confide…
zkleb-aai Mar 1, 2026
07ae4b8
Update AssemblyAI examples to use u3-rt-pro and improve 55d example
zkleb-aai Mar 1, 2026
66fca7e
Add backward compatibility for min_end_of_turn_silence_when_confident…
zkleb-aai Mar 1, 2026
d1cbc81
Fix 07o example to use new min_turn_silence parameter name in docs an…
zkleb-aai Mar 1, 2026
5de495c
Use logger.warning instead of warnings.warn for deprecation message
zkleb-aai Mar 1, 2026
42f91a9
Apply ruff formatting fixes
zkleb-aai Mar 1, 2026
6968d83
Add changelog entries for PR #3856
zkleb-aai Mar 1, 2026
36b9c05
Fix changelog entries to use proper markdown bullet format
zkleb-aai Mar 1, 2026
cb7e612
Remove test files and testing documentation from PR
zkleb-aai Mar 1, 2026
7648b62
Update src/pipecat/services/assemblyai/stt.py
zkleb-aai Mar 2, 2026
6729f43
Update src/pipecat/services/assemblyai/stt.py
zkleb-aai Mar 2, 2026
5c2ca0c
Update changelog/3856.changed.md
zkleb-aai Mar 2, 2026
b449515
Address PR review feedback: remove debug logs, fix hasattr logic, add…
zkleb-aai Mar 2, 2026
32773b4
Improve terminology: rename file and replace 'STT mode' with 'Assembl…
zkleb-aai Mar 2, 2026
c6c2c5b
Fix end_of_turn_confidence_threshold: set to 1.0 (not 0.0) for univer…
zkleb-aai Mar 2, 2026
038f6a7
Linting
markbackman Mar 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions src/pipecat/services/assemblyai/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from typing import List, Literal, Optional

from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field


class Word(BaseModel):
Expand Down Expand Up @@ -68,15 +68,38 @@ class TurnMessage(BaseMessage):
transcript: The transcribed text for this turn.
end_of_turn_confidence: Confidence score for end-of-turn detection.
words: List of individual words with timing and confidence data.
language_code: Detected language code (e.g., "es", "fr"). Only present with
complete utterances or when end_of_turn is True.
language_confidence: Confidence score (0-1) for language detection. Only present
with complete utterances or when end_of_turn is True.
speaker: Speaker label (e.g., "A", "B"). Only present when speaker_labels is
enabled and end_of_turn is True. Maps to 'speaker_label' in JSON response.
"""

model_config = ConfigDict(populate_by_name=True)

type: Literal["Turn"] = "Turn"
turn_order: int
turn_is_formatted: bool
end_of_turn: bool
transcript: str
end_of_turn_confidence: float
words: List[Word]
language_code: Optional[str] = None
language_confidence: Optional[float] = None
speaker: Optional[str] = Field(default=None, alias="speaker_label")


class SpeechStartedMessage(BaseMessage):
"""Message sent when speech is first detected in the audio stream.

Parameters:
type: Always "SpeechStarted" for this message type.
timestamp: Audio timestamp in milliseconds when speech was detected.
"""

type: Literal["SpeechStarted"] = "SpeechStarted"
timestamp: int


class TerminationMessage(BaseMessage):
Expand All @@ -94,7 +117,7 @@ class TerminationMessage(BaseMessage):


# Union type for all possible message types
AnyMessage = BeginMessage | TurnMessage | TerminationMessage
AnyMessage = BeginMessage | TurnMessage | SpeechStartedMessage | TerminationMessage


class AssemblyAIConnectionParams(BaseModel):
Expand All @@ -109,7 +132,15 @@ class AssemblyAIConnectionParams(BaseModel):
min_end_of_turn_silence_when_confident: Minimum silence duration when confident about end-of-turn.
max_turn_silence: Maximum silence duration before forcing end-of-turn.
keyterms_prompt: List of key terms to guide transcription. Will be JSON serialized before sending.
speech_model: Select between English and multilingual models. Defaults to "universal-streaming-english".
prompt: Optional text prompt to guide the transcription. Only used when speech_model is "u3-rt-pro".
speech_model: Select between English, multilingual, and u3-rt-pro models. Defaults to "u3-rt-pro".
language_detection: Enable automatic language detection. Only applicable to
universal-streaming-multilingual. When enabled, Turn messages include
language_code and language_confidence fields. Defaults to None (not sent).
format_turns: Whether to format transcript turns. Defaults to True.
speaker_labels: Enable speaker diarization. When enabled, final transcripts
(end_of_turn=True) include a speaker field identifying the speaker
(e.g., "Speaker A", "Speaker B"). Defaults to None (not sent).
"""

sample_rate: int = 16000
Expand All @@ -120,6 +151,10 @@ class AssemblyAIConnectionParams(BaseModel):
min_end_of_turn_silence_when_confident: Optional[int] = None
max_turn_silence: Optional[int] = None
keyterms_prompt: Optional[List[str]] = None
speech_model: Literal["universal-streaming-english", "universal-streaming-multilingual"] = (
"universal-streaming-english"
prompt: Optional[str] = None
speech_model: Literal["universal-streaming-english", "universal-streaming-multilingual", "u3-rt-pro"] = (
"u3-rt-pro"
)
language_detection: Optional[bool] = None
format_turns: bool = True
speaker_labels: Optional[bool] = None
Loading
Loading