Merge pull request pipecat-ai#3425 from pipecat-ai/pk/gemini-3-flash-new-thinking-levels

kompfner · web-flow · commit 8b0f0b5bb472 · 2026-01-13T11:02:53.000-05:00
Add Gemini 3 Flash-specific thinking levels
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,7 +62,7 @@ fal = [ "fal-client~=0.5.9" ]
 fireworks = []
 fish = [ "ormsgpack~=1.7.0", "pipecat-ai[websockets-base]" ]
 gladia = [ "pipecat-ai[websockets-base]" ]
-google = [ "google-cloud-speech>=2.33.0,<3", "google-cloud-texttospeech>=2.31.0,<3", "google-genai>=1.51.0,<2", "pipecat-ai[websockets-base]" ]
+google = [ "google-cloud-speech>=2.33.0,<3", "google-cloud-texttospeech>=2.31.0,<3", "google-genai>=1.57.0,<2", "pipecat-ai[websockets-base]" ]
 gradium = [ "pipecat-ai[websockets-base]" ]
 grok = []
 groq = [ "groq~=0.23.0" ]
@@ -111,7 +111,7 @@ tavus=[]
 together = []
 tracing = [ "opentelemetry-sdk>=1.33.0", "opentelemetry-api>=1.33.0", "opentelemetry-instrumentation>=0.54b0" ]
 ultravox = [ "pipecat-ai[websockets-base]" ]
-webrtc = [ "aiortc>=1.13.0,<2", "opencv-python>=4.11.0.86,<5" ]
+webrtc = [ "aiortc>=1.14.0,<2", "opencv-python>=4.11.0.86,<5" ]
 websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<0.122.0" ]
 websockets-base = [ "websockets>=13.1,<16.0" ]
 whisper = [ "faster-whisper~=1.1.1" ]
diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py
@@ -708,16 +708,18 @@ class ThinkingConfig(BaseModel):
         Gemini 2.5 and 3 series models have this thinking process.
 
         Parameters:
-            thinking_level: Thinking level for Gemini 3 Pro. Can be "low" or "high".
-                If not provided, Gemini 3 Pro defaults to "high".
-                Note: Gemini 2.5 series should use thinking_budget instead.
+            thinking_level: Thinking level for Gemini 3 models.
+                For Gemini 3 Pro, this can be "low" or "high".
+                For Gemini 3 Flash, this can be "minimal", "low", "medium", or "high".
+                If not provided, Gemini 3 models default to "high".
+                Note: Gemini 2.5 series must use thinking_budget instead.
             thinking_budget: Token budget for thinking, for Gemini 2.5 series.
                 -1 for dynamic thinking (model decides), 0 to disable thinking,
                 or a specific token count (e.g., 128-32768 for 2.5 Pro).
                 If not provided, most models today default to dynamic thinking.
                 See https://ai.google.dev/gemini-api/docs/thinking#set-budget
                 for default values and allowed ranges.
-                Note: Gemini 3 Pro should use thinking_level instead.
+                Note: Gemini 3 models must use thinking_level instead.
             include_thoughts: Whether to include thought summaries in the response.
                 Today's models default to not including thoughts (False).
         """
@@ -726,7 +728,9 @@ class ThinkingConfig(BaseModel):
 
         # Why `| str` here? To not break compatibility in case Google adds more
         # levels in the future.
-        thinking_level: Optional[Literal["low", "high"] | str] = Field(default=None)
+        thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field(
+            default=None
+        )
 
         include_thoughts: Optional[bool] = Field(default=None)
 
diff --git a/uv.lock b/uv.lock