From ec3b62104c0442e59c5295507864cc500213a58d Mon Sep 17 00:00:00 2001 From: bmdub <16746384+bmdub@users.noreply.github.com> Date: Wed, 7 May 2025 18:30:52 -0700 Subject: [PATCH] Add GenerateSpeechStreamingAsync() for streaming TTS responses. --- src/Custom/Audio/AudioClient.cs | 44 ++++++++++++++++++++++++- src/Generated/AudioClient.RestClient.cs | 1 + 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/Custom/Audio/AudioClient.cs b/src/Custom/Audio/AudioClient.cs index 9a224768..61fecb98 100644 --- a/src/Custom/Audio/AudioClient.cs +++ b/src/Custom/Audio/AudioClient.cs @@ -1,7 +1,10 @@ using System; +using System.Buffers; using System.ClientModel; using System.ClientModel.Primitives; +using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; @@ -108,9 +111,47 @@ public virtual async Task> GenerateSpeechAsync(string t using BinaryContent content = options; ClientResult result = await GenerateSpeechAsync(content, cancellationToken.ToRequestOptions()).ConfigureAwait(false); + await result.GetRawResponse().BufferContentAsync(cancellationToken).ConfigureAwait(false); return ClientResult.FromValue(result.GetRawResponse().Content, result.GetRawResponse()); } + /// Generates a life-like, spoken audio recording of the input text. + /// + /// The default format of the generated audio is unless otherwise specified + /// via . + /// + /// The text to generate audio for. + /// The voice to use in the generated audio. + /// The options to configure the audio generation. + /// A token that can be used to cancel this method call. + /// is null. + /// Streaming chunks of the generated audio in the specified output format. + public virtual async IAsyncEnumerable GenerateSpeechStreamingAsync(string text, GeneratedSpeechVoice voice, SpeechGenerationOptions options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + Argument.AssertNotNull(text, nameof(text)); + + options ??= new(); + CreateSpeechGenerationOptions(text, voice, ref options); + + using BinaryContent content = options; + ClientResult result = await GenerateSpeechAsync(content, cancellationToken.ToRequestOptions()).ConfigureAwait(false); + var stream = result.GetRawResponse().ContentStream; + + var buffer = ArrayPool.Shared.Rent(1024 * 16); + try + { + int bytesRead; + while ((bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length, cancellationToken).ConfigureAwait(false)) != 0) + { + yield return BinaryData.FromBytes(buffer.AsMemory(0, bytesRead)); + } + } + finally + { + ArrayPool.Shared.Return(buffer); + } + } + /// Generates a life-like, spoken audio recording of the input text. /// /// The default format of the generated audio is unless otherwise specified @@ -130,7 +171,8 @@ public virtual ClientResult GenerateSpeech(string text, GeneratedSpe CreateSpeechGenerationOptions(text, voice, ref options); using BinaryContent content = options; - ClientResult result = GenerateSpeech(content, cancellationToken.ToRequestOptions()); ; + ClientResult result = GenerateSpeech(content, cancellationToken.ToRequestOptions()); + result.GetRawResponse().BufferContent(cancellationToken); return ClientResult.FromValue(result.GetRawResponse().Content, result.GetRawResponse()); } diff --git a/src/Generated/AudioClient.RestClient.cs b/src/Generated/AudioClient.RestClient.cs index 719788ac..c018c3fb 100644 --- a/src/Generated/AudioClient.RestClient.cs +++ b/src/Generated/AudioClient.RestClient.cs @@ -18,6 +18,7 @@ internal virtual PipelineMessage CreateCreateSpeechRequest(BinaryContent content { PipelineMessage message = Pipeline.CreateMessage(); message.ResponseClassifier = PipelineMessageClassifier200; + message.BufferResponse = false; PipelineRequest request = message.Request; request.Method = "POST"; ClientUriBuilder uri = new ClientUriBuilder();