Skip to content

Commit bdccb5f

Browse files
dizcologybusunkim96
authored andcommitted
Speech gapic client library [(#1012)](#1012)
* Migrate quickstart to GAPIC client library * Migrate transcribe to GAPIC client library * Migrate transcribe_async to GAPIC client library * Migrate transcribe_streaming to GAPIC client library * clean up * clean up * Import from google.cloud.speech * update transcribe samples * import in alphabetic order * remove unused variable * use strings instead of enums * restructure code * comment on sreaming requests * import style * flake * correct indent * migrate transcribe_streaming_mic to gapic * update google-cloud-speech version requirement * addressing review comments * at the end of the audio stream, put None to signal to the generator * flake * addressing github review comments * add region tags for migration guide * update README * rst format * bullet * addressing PR review comments * use enums * remove a word
1 parent 6c4e8cd commit bdccb5f

8 files changed

+181
-98
lines changed

speech/snippets/README.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ Google Cloud Speech API Python Samples
55

66
This directory contains samples for Google Cloud Speech API. The `Google Cloud Speech API`_ enables easy integration of Google speech recognition technologies into developer applications. Send audio and receive a text transcription from the Cloud Speech API service.
77

8+
- See the `migration guide`_ for information about migrating to Python client library v0.27.
9+
10+
.. _migration guide: https://cloud.google.com/speech/docs/python-client-migration
11+
812

913

1014

speech/snippets/README.rst.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ product:
99
recognition technologies into developer applications. Send audio and receive
1010
a text transcription from the Cloud Speech API service.
1111

12+
13+
- See the `migration guide`_ for information about migrating to Python client library v0.27.
14+
15+
16+
.. _migration guide: https://cloud.google.com/speech/docs/python-client-migration
17+
1218
setup:
1319
- auth
1420
- install_deps

speech/snippets/quickstart.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,16 @@ def run_quickstart():
2121
import os
2222

2323
# Imports the Google Cloud client library
24+
# [START migration_import]
2425
from google.cloud import speech
26+
from google.cloud.speech import enums
27+
from google.cloud.speech import types
28+
# [END migration_import]
2529

2630
# Instantiates a client
27-
speech_client = speech.Client()
31+
# [START migration_client]
32+
client = speech.SpeechClient()
33+
# [END migration_client]
2834

2935
# The name of the audio file to transcribe
3036
file_name = os.path.join(
@@ -35,14 +41,16 @@ def run_quickstart():
3541
# Loads the audio into memory
3642
with io.open(file_name, 'rb') as audio_file:
3743
content = audio_file.read()
38-
sample = speech_client.sample(
39-
content,
40-
source_uri=None,
41-
encoding='LINEAR16',
42-
sample_rate_hertz=16000)
44+
audio = types.RecognitionAudio(content=content)
45+
46+
config = types.RecognitionConfig(
47+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
48+
sample_rate_hertz=16000,
49+
language_code='en-US')
4350

4451
# Detects speech in the audio file
45-
alternatives = sample.recognize('en-US')
52+
response = client.recognize(config, audio)
53+
alternatives = response.results[0].alternatives
4654

4755
for alternative in alternatives:
4856
print('Transcript: {}'.format(alternative.transcript))

speech/snippets/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-speech==0.26.0
1+
google-cloud-speech==0.27.0

speech/snippets/transcribe.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,33 +31,50 @@
3131
def transcribe_file(speech_file):
3232
"""Transcribe the given audio file."""
3333
from google.cloud import speech
34-
speech_client = speech.Client()
34+
from google.cloud.speech import enums
35+
from google.cloud.speech import types
36+
client = speech.SpeechClient()
3537

38+
# [START migration_sync_request]
39+
# [START migration_audio_config_file]
3640
with io.open(speech_file, 'rb') as audio_file:
3741
content = audio_file.read()
38-
audio_sample = speech_client.sample(
39-
content=content,
40-
source_uri=None,
41-
encoding='LINEAR16',
42-
sample_rate_hertz=16000)
4342

44-
alternatives = audio_sample.recognize('en-US')
43+
audio = types.RecognitionAudio(content=content)
44+
config = types.RecognitionConfig(
45+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
46+
sample_rate_hertz=16000,
47+
language_code='en-US')
48+
# [END migration_audio_config_file]
49+
50+
# [START migration_sync_response]
51+
response = client.recognize(config, audio)
52+
# [END migration_sync_request]
53+
alternatives = response.results[0].alternatives
54+
4555
for alternative in alternatives:
4656
print('Transcript: {}'.format(alternative.transcript))
57+
# [END migration_sync_response]
4758

4859

4960
def transcribe_gcs(gcs_uri):
5061
"""Transcribes the audio file specified by the gcs_uri."""
5162
from google.cloud import speech
52-
speech_client = speech.Client()
63+
from google.cloud.speech import enums
64+
from google.cloud.speech import types
65+
client = speech.SpeechClient()
66+
67+
# [START migration_audio_config_gcs]
68+
audio = types.RecognitionAudio(uri=gcs_uri)
69+
config = types.RecognitionConfig(
70+
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
71+
sample_rate_hertz=16000,
72+
language_code='en-US')
73+
# [END migration_audio_config_gcs]
5374

54-
audio_sample = speech_client.sample(
55-
content=None,
56-
source_uri=gcs_uri,
57-
encoding='FLAC',
58-
sample_rate_hertz=16000)
75+
response = client.recognize(config, audio)
76+
alternatives = response.results[0].alternatives
5977

60-
alternatives = audio_sample.recognize('en-US')
6178
for alternative in alternatives:
6279
print('Transcript: {}'.format(alternative.transcript))
6380

speech/snippets/transcribe_async.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,63 +30,69 @@
3030
def transcribe_file(speech_file):
3131
"""Transcribe the given audio file asynchronously."""
3232
from google.cloud import speech
33-
speech_client = speech.Client()
33+
from google.cloud.speech import enums
34+
from google.cloud.speech import types
35+
client = speech.SpeechClient()
3436

37+
# [START migration_async_request]
3538
with io.open(speech_file, 'rb') as audio_file:
3639
content = audio_file.read()
37-
audio_sample = speech_client.sample(
38-
content,
39-
source_uri=None,
40-
encoding='LINEAR16',
41-
sample_rate_hertz=16000)
4240

43-
operation = audio_sample.long_running_recognize('en-US')
41+
audio = types.RecognitionAudio(content=content)
42+
config = types.RecognitionConfig(
43+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
44+
sample_rate_hertz=16000,
45+
language_code='en-US')
4446

47+
# [START migration_async_response]
48+
operation = client.long_running_recognize(config, audio)
49+
# [END migration_async_request]
50+
51+
# Sleep and poll operation.done()
4552
retry_count = 100
46-
while retry_count > 0 and not operation.complete:
53+
while retry_count > 0 and not operation.done():
4754
retry_count -= 1
4855
time.sleep(2)
49-
operation.poll()
5056

51-
if not operation.complete:
57+
if not operation.done():
5258
print('Operation not complete and retry limit reached.')
5359
return
5460

55-
alternatives = operation.results
61+
alternatives = operation.result().results[0].alternatives
5662
for alternative in alternatives:
5763
print('Transcript: {}'.format(alternative.transcript))
5864
print('Confidence: {}'.format(alternative.confidence))
59-
# [END send_request]
65+
# [END migration_async_response]
6066

6167

6268
def transcribe_gcs(gcs_uri):
6369
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
6470
from google.cloud import speech
65-
speech_client = speech.Client()
71+
from google.cloud.speech import enums
72+
from google.cloud.speech import types
73+
client = speech.SpeechClient()
6674

67-
audio_sample = speech_client.sample(
68-
content=None,
69-
source_uri=gcs_uri,
70-
encoding='FLAC',
71-
sample_rate_hertz=16000)
75+
audio = types.RecognitionAudio(uri=gcs_uri)
76+
config = types.RecognitionConfig(
77+
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
78+
sample_rate_hertz=16000,
79+
language_code='en-US')
7280

73-
operation = audio_sample.long_running_recognize('en-US')
81+
operation = client.long_running_recognize(config, audio)
7482

7583
retry_count = 100
76-
while retry_count > 0 and not operation.complete:
84+
while retry_count > 0 and not operation.done():
7785
retry_count -= 1
7886
time.sleep(2)
79-
operation.poll()
8087

81-
if not operation.complete:
88+
if not operation.done():
8289
print('Operation not complete and retry limit reached.')
8390
return
8491

85-
alternatives = operation.results
92+
alternatives = operation.result().results[0].alternatives
8693
for alternative in alternatives:
8794
print('Transcript: {}'.format(alternative.transcript))
8895
print('Confidence: {}'.format(alternative.confidence))
89-
# [END send_request_gcs]
9096

9197

9298
if __name__ == '__main__':

speech/snippets/transcribe_streaming.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,39 @@
2929
def transcribe_streaming(stream_file):
3030
"""Streams transcription of the given audio file."""
3131
from google.cloud import speech
32-
speech_client = speech.Client()
32+
from google.cloud.speech import enums
33+
from google.cloud.speech import types
34+
client = speech.SpeechClient()
3335

36+
# [START migration_streaming_request]
3437
with io.open(stream_file, 'rb') as audio_file:
35-
audio_sample = speech_client.sample(
36-
stream=audio_file,
37-
encoding=speech.encoding.Encoding.LINEAR16,
38-
sample_rate_hertz=16000)
39-
alternatives = audio_sample.streaming_recognize('en-US')
40-
41-
for alternative in alternatives:
42-
print('Finished: {}'.format(alternative.is_final))
43-
print('Stability: {}'.format(alternative.stability))
44-
print('Confidence: {}'.format(alternative.confidence))
45-
print('Transcript: {}'.format(alternative.transcript))
38+
content = audio_file.read()
39+
40+
# In practice, stream should be a generator yielding chunks of audio data.
41+
stream = [content]
42+
requests = (types.StreamingRecognizeRequest(audio_content=chunk)
43+
for chunk in stream)
44+
45+
config = types.RecognitionConfig(
46+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
47+
sample_rate_hertz=16000,
48+
language_code='en-US')
49+
streaming_config = types.StreamingRecognitionConfig(config=config)
50+
51+
# streaming_recognize returns a generator.
52+
# [START migration_streaming_response]
53+
responses = client.streaming_recognize(streaming_config, requests)
54+
# [END migration_streaming_request]
55+
56+
for response in responses:
57+
for result in response.results:
58+
print('Finished: {}'.format(result.is_final))
59+
print('Stability: {}'.format(result.stability))
60+
alternatives = result.alternatives
61+
for alternative in alternatives:
62+
print('Confidence: {}'.format(alternative.confidence))
63+
print('Transcript: {}'.format(alternative.transcript))
64+
# [END migration_streaming_response]
4665

4766

4867
if __name__ == '__main__':

0 commit comments

Comments
 (0)