-
-
Notifications
You must be signed in to change notification settings - Fork 0
feat:Add Google TTS provider to Ultravox OpenAPI with GoogleVoice schema #128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| #nullable enable | ||
|
|
||
| namespace Ultravox | ||
| { | ||
| public sealed partial class UltravoxV1GoogleVoice | ||
| { | ||
| /// <summary> | ||
| /// Serializes the current instance to a JSON string using the provided JsonSerializerContext. | ||
| /// </summary> | ||
| public string ToJson( | ||
| global::System.Text.Json.Serialization.JsonSerializerContext jsonSerializerContext) | ||
| { | ||
| return global::System.Text.Json.JsonSerializer.Serialize( | ||
| this, | ||
| this.GetType(), | ||
| jsonSerializerContext); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Serializes the current instance to a JSON string using the provided JsonSerializerOptions. | ||
| /// </summary> | ||
| #if NET8_0_OR_GREATER | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresUnreferencedCode("JSON serialization and deserialization might require types that cannot be statically analyzed. Use the overload that takes a JsonTypeInfo or JsonSerializerContext, or make sure all of the required types are preserved.")] | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresDynamicCode("JSON serialization and deserialization might require types that cannot be statically analyzed and might need runtime code generation. Use System.Text.Json source generation for native AOT applications.")] | ||
| #endif | ||
| public string ToJson( | ||
| global::System.Text.Json.JsonSerializerOptions? jsonSerializerOptions = null) | ||
| { | ||
| return global::System.Text.Json.JsonSerializer.Serialize( | ||
| this, | ||
| jsonSerializerOptions); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Deserializes a JSON string using the provided JsonSerializerContext. | ||
| /// </summary> | ||
| public static global::Ultravox.UltravoxV1GoogleVoice? FromJson( | ||
| string json, | ||
| global::System.Text.Json.Serialization.JsonSerializerContext jsonSerializerContext) | ||
| { | ||
| return global::System.Text.Json.JsonSerializer.Deserialize( | ||
| json, | ||
| typeof(global::Ultravox.UltravoxV1GoogleVoice), | ||
| jsonSerializerContext) as global::Ultravox.UltravoxV1GoogleVoice; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Deserializes a JSON string using the provided JsonSerializerOptions. | ||
| /// </summary> | ||
| #if NET8_0_OR_GREATER | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresUnreferencedCode("JSON serialization and deserialization might require types that cannot be statically analyzed. Use the overload that takes a JsonTypeInfo or JsonSerializerContext, or make sure all of the required types are preserved.")] | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresDynamicCode("JSON serialization and deserialization might require types that cannot be statically analyzed and might need runtime code generation. Use System.Text.Json source generation for native AOT applications.")] | ||
| #endif | ||
| public static global::Ultravox.UltravoxV1GoogleVoice? FromJson( | ||
| string json, | ||
| global::System.Text.Json.JsonSerializerOptions? jsonSerializerOptions = null) | ||
| { | ||
| return global::System.Text.Json.JsonSerializer.Deserialize<global::Ultravox.UltravoxV1GoogleVoice>( | ||
| json, | ||
| jsonSerializerOptions); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Deserializes a JSON stream using the provided JsonSerializerContext. | ||
| /// </summary> | ||
| public static async global::System.Threading.Tasks.ValueTask<global::Ultravox.UltravoxV1GoogleVoice?> FromJsonStreamAsync( | ||
| global::System.IO.Stream jsonStream, | ||
| global::System.Text.Json.Serialization.JsonSerializerContext jsonSerializerContext) | ||
| { | ||
| return (await global::System.Text.Json.JsonSerializer.DeserializeAsync( | ||
| jsonStream, | ||
| typeof(global::Ultravox.UltravoxV1GoogleVoice), | ||
| jsonSerializerContext).ConfigureAwait(false)) as global::Ultravox.UltravoxV1GoogleVoice; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Deserializes a JSON stream using the provided JsonSerializerOptions. | ||
| /// </summary> | ||
| #if NET8_0_OR_GREATER | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresUnreferencedCode("JSON serialization and deserialization might require types that cannot be statically analyzed. Use the overload that takes a JsonTypeInfo or JsonSerializerContext, or make sure all of the required types are preserved.")] | ||
| [global::System.Diagnostics.CodeAnalysis.RequiresDynamicCode("JSON serialization and deserialization might require types that cannot be statically analyzed and might need runtime code generation. Use System.Text.Json source generation for native AOT applications.")] | ||
| #endif | ||
| public static global::System.Threading.Tasks.ValueTask<global::Ultravox.UltravoxV1GoogleVoice?> FromJsonStreamAsync( | ||
| global::System.IO.Stream jsonStream, | ||
| global::System.Text.Json.JsonSerializerOptions? jsonSerializerOptions = null) | ||
| { | ||
| return global::System.Text.Json.JsonSerializer.DeserializeAsync<global::Ultravox.UltravoxV1GoogleVoice?>( | ||
| jsonStream, | ||
| jsonSerializerOptions); | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
|
|
||
| #nullable enable | ||
|
|
||
| namespace Ultravox | ||
| { | ||
| /// <summary> | ||
| /// Specification for a voice served by Google.<br/> | ||
| /// This implementation uses bidirectional streaming, so voices prior to Chirp3 are not supported. | ||
| /// </summary> | ||
| public sealed partial class UltravoxV1GoogleVoice | ||
| { | ||
| /// <summary> | ||
| /// The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon". | ||
| /// </summary> | ||
| [global::System.Text.Json.Serialization.JsonPropertyName("voiceId")] | ||
| public string? VoiceId { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// The speaking rate. Must be between 0.25 and 2. Defaults to 1.<br/> | ||
| /// See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig | ||
| /// </summary> | ||
| [global::System.Text.Json.Serialization.JsonPropertyName("speakingRate")] | ||
| public float? SpeakingRate { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// Additional properties that are not explicitly defined in the schema | ||
| /// </summary> | ||
| [global::System.Text.Json.Serialization.JsonExtensionData] | ||
| public global::System.Collections.Generic.IDictionary<string, object> AdditionalProperties { get; set; } = new global::System.Collections.Generic.Dictionary<string, object>(); | ||
|
|
||
| /// <summary> | ||
| /// Initializes a new instance of the <see cref="UltravoxV1GoogleVoice" /> class. | ||
| /// </summary> | ||
| /// <param name="voiceId"> | ||
| /// The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon". | ||
| /// </param> | ||
| /// <param name="speakingRate"> | ||
| /// The speaking rate. Must be between 0.25 and 2. Defaults to 1.<br/> | ||
| /// See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig | ||
| /// </param> | ||
| #if NET7_0_OR_GREATER | ||
| [global::System.Diagnostics.CodeAnalysis.SetsRequiredMembers] | ||
| #endif | ||
| public UltravoxV1GoogleVoice( | ||
| string? voiceId, | ||
| float? speakingRate) | ||
| { | ||
| this.VoiceId = voiceId; | ||
| this.SpeakingRate = speakingRate; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Initializes a new instance of the <see cref="UltravoxV1GoogleVoice" /> class. | ||
| /// </summary> | ||
| public UltravoxV1GoogleVoice() | ||
| { | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2611,6 +2611,10 @@ components: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| allOf: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - $ref: '#/components/schemas/KeyPrefix' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: The LMNT API key. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| google: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| allOf: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - $ref: '#/components/schemas/KeyPrefix' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: The Google service account key. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Agent: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| required: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - agentId | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -3696,6 +3700,10 @@ components: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: string | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "Your LMNT API key.\nhttps://app.lmnt.com/account#api-keys" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| nullable: true | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| google: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: string | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "A service account JSON key for your Google Cloud project with the Text-to-Speech API enabled.\nhttps://cloud.google.com/text-to-speech/docs/quickstart-client-libraries#before-you-begin\nhttps://cloud.google.com/iam/docs/keys-create-delete#creating" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| nullable: true | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| PatchedVoice: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: object | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| properties: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -4754,6 +4762,10 @@ components: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| allOf: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - $ref: '#/components/schemas/ultravox.v1.LmntVoice' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: A voice served by LMNT. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| google: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| allOf: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - $ref: '#/components/schemas/ultravox.v1.GoogleVoice' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "A voice served by Google, using bidirectional streaming.\n (For non-streaming or output-only streaming, use generic.)" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| generic: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| allOf: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - $ref: '#/components/schemas/ultravox.v1.GenericVoice' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -4839,6 +4851,17 @@ components: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: string | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "For JSON responses, the path to the field containing base64-encoded audio data. The data must\n be PCM audio, optionally with a WAV header." | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "Specification for a voice served by some generic REST-based TTS API. The API must\n accept an application/json POST request (as defined below) and return either WAV\n audio, raw PCM audio, or application/json with a base64 encoded audio data field\n that itself corresponds to WAV or raw PCM audio.\n Note that this simple API implies a lack of either input streaming or audio timing\n information, so more specific voice types are preferable when available." | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ultravox.v1.GoogleVoice: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: object | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| properties: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| voiceId: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: string | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: 'The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon".' | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| speakingRate: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: number | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "The speaking rate. Must be between 0.25 and 2. Defaults to 1.\n See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| format: float | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| description: "Specification for a voice served by Google.\n This implementation uses bidirectional streaming, so voices prior to Chirp3 are not supported." | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ultravox.v1.HeaderApiKeyRequirement: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+4854
to
4865
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Verification agent 🧩 Analysis chainAdd validation bounds and a default for speakingRate; include an example voiceId. Without min/max/default, clients can submit out-of-range values that will fail at runtime. This mirrors other provider schemas (e.g., LMNT speed 0.25–2, ElevenLabs speed defaults). Proposed schema tweaks: ultravox.v1.GoogleVoice:
type: object
properties:
voiceId:
type: string
- description: 'The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon".'
+ description: 'The ID (name) of the voice in Google.'
+ example: en-US-Chirp3-HD-Charon
speakingRate:
type: number
- description: "The speaking rate. Must be between 0.25 and 2. Defaults to 1.
- See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig"
format: float
+ minimum: 0.25
+ maximum: 2
+ default: 1
+ description: "The speaking rate.
+ See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig"
- description: "Specification for a voice served by Google.
- This implementation uses bidirectional streaming, so voices prior to Chirp3 are not supported."
+ description: "Specification for a voice served by Google. Uses bidirectional streaming."Also, please verify that the referenced Python type path is correct for the current client library and version. 🌐 Web query: 💡 Result: Yes. The Python type is google.cloud.texttospeech_v1.types.StreamingAudioConfig. [1][2] References:
Add validation bounds, default, and example to GoogleVoice schema ultravox.v1.GoogleVoice:
type: object
properties:
voiceId:
- description: 'The ID (name) of the voice in Google, e.g. "en-US-Chirp3-HD-Charon".'
+ description: 'The ID (name) of the voice in Google.'
+ example: en-US-Chirp3-HD-Charon
speakingRate:
type: number
format: float
+ minimum: 0.25
+ maximum: 2
+ default: 1
description: "The speaking rate.
See https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1.types.StreamingAudioConfig"
- description: "Specification for a voice served by Google.
- This implementation uses bidirectional streaming, so voices prior to Chirp3 are not supported."
+ description: "Specification for a voice served by Google. Uses bidirectional streaming."Verified that 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| type: object | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| properties: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Verification agent
🧩 Analysis chain
Confirm runtime truly requires “bidirectional streaming” (Chirp3+) for Google.
The description is specific and time-sensitive. Please confirm the server implementation currently only supports bidi streaming voices and that non-Chirp voices fail predictably; otherwise soften the claim.
🌐 Web query:
💡 Result:
Short answer:
Sources:
[1] Google Cloud Text‑to‑Speech StreamingAudioConfig doc (speakingRate range).
[2] Google Cloud Text‑to‑Speech supported voices list (shows available voice families including Chirp/Chirp3).
Clarify Google voice description in OpenAPI schema
Update the description to note that StreamingAudioConfig supports all streaming-compatible voice families (Wavenet, Neural2, Chirp3, Studio, Standard) rather than implying Chirp3-only.
File: src/libs/Ultravox/openapi.yaml Lines 4765-4769
📝 Committable suggestion
🤖 Prompt for AI Agents