Skip to content

Commit faddda1

Browse files
gguussJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Add speech GCS samples (#784)
1 parent 413db4a commit faddda1

File tree

5 files changed

+102
-57
lines changed

5 files changed

+102
-57
lines changed

speech/cloud-client/README.rst

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,20 @@ To run this sample:
9393
9494
$ python transcribe.py
9595
96-
usage: transcribe.py [-h] speech_file
96+
usage: transcribe.py [-h] path
9797
9898
Google Cloud Speech API sample application using the REST API for batch
9999
processing.
100100
101-
Example usage: python transcribe.py resources/audio.raw
101+
Example usage:
102+
python transcribe.py resources/audio.raw
103+
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac
102104
103105
positional arguments:
104-
speech_file Full path of audio file to be recognized
106+
path File or GCS path for audio file to be recognized
105107
106108
optional arguments:
107-
-h, --help show this help message and exit
109+
-h, --help show this help message and exit
108110
109111
110112
Transcribe async
@@ -118,18 +120,20 @@ To run this sample:
118120
119121
$ python transcribe_async.py
120122
121-
usage: transcribe_async.py [-h] speech_file
123+
usage: transcribe_async.py [-h] path
122124
123125
Google Cloud Speech API sample application using the REST API for async
124126
batch processing.
125127
126-
Example usage: python transcribe_async.py resources/audio.raw
128+
Example usage:
129+
python transcribe_async.py resources/audio.raw
130+
python transcribe_async.py gs://cloud-samples-tests/speech/brooklyn.flac
127131
128132
positional arguments:
129-
speech_file Full path of audio file to be recognized
133+
path File or GCS path for audio file to be recognized
130134
131135
optional arguments:
132-
-h, --help show this help message and exit
136+
-h, --help show this help message and exit
133137
134138
135139

speech/cloud-client/transcribe.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
"""Google Cloud Speech API sample application using the REST API for batch
1818
processing.
1919
20-
Example usage: python transcribe.py resources/audio.raw
20+
Example usage:
21+
python transcribe.py resources/audio.raw
22+
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac
2123
"""
2224

2325
# [START import_libraries]
@@ -26,44 +28,48 @@
2628
# [END import_libraries]
2729

2830

29-
def main(speech_file):
30-
"""Transcribe the given audio file.
31-
32-
Args:
33-
speech_file: the name of the audio file.
34-
"""
35-
# [START authenticating]
36-
# Application default credentials provided by env variable
37-
# GOOGLE_APPLICATION_CREDENTIALS
31+
def transcribe_file(speech_file):
32+
"""Transcribe the given audio file."""
3833
from google.cloud import speech
3934
speech_client = speech.Client()
40-
# [END authenticating]
4135

42-
# [START construct_request]
43-
# Loads the audio into memory
4436
with io.open(speech_file, 'rb') as audio_file:
4537
content = audio_file.read()
4638
audio_sample = speech_client.sample(
47-
content,
39+
content=content,
4840
source_uri=None,
4941
encoding='LINEAR16',
5042
sample_rate=16000)
51-
# [END construct_request]
5243

53-
# [START send_request]
5444
alternatives = speech_client.speech_api.sync_recognize(audio_sample)
5545
for alternative in alternatives:
5646
print('Transcript: {}'.format(alternative.transcript))
57-
# [END send_request]
5847

5948

60-
# [START run_application]
49+
def transcribe_gcs(gcs_uri):
50+
"""Transcribes the audio file specified by the gcs_uri."""
51+
from google.cloud import speech
52+
speech_client = speech.Client()
53+
54+
audio_sample = speech_client.sample(
55+
content=None,
56+
source_uri=gcs_uri,
57+
encoding='FLAC',
58+
sample_rate=16000)
59+
60+
alternatives = speech_client.speech_api.sync_recognize(audio_sample)
61+
for alternative in alternatives:
62+
print('Transcript: {}'.format(alternative.transcript))
63+
64+
6165
if __name__ == '__main__':
6266
parser = argparse.ArgumentParser(
6367
description=__doc__,
6468
formatter_class=argparse.RawDescriptionHelpFormatter)
6569
parser.add_argument(
66-
'speech_file', help='Full path of audio file to be recognized')
70+
'path', help='File or GCS path for audio file to be recognized')
6771
args = parser.parse_args()
68-
main(args.speech_file)
69-
# [END run_application]
72+
if args.path.startswith('gs://'):
73+
transcribe_gcs(args.path)
74+
else:
75+
transcribe_file(args.path)

speech/cloud-client/transcribe_async.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,41 +17,29 @@
1717
"""Google Cloud Speech API sample application using the REST API for async
1818
batch processing.
1919
20-
Example usage: python transcribe_async.py resources/audio.raw
20+
Example usage:
21+
python transcribe_async.py resources/audio.raw
22+
python transcribe_async.py gs://cloud-samples-tests/speech/brooklyn.flac
2123
"""
2224

23-
# [START import_libraries]
2425
import argparse
2526
import io
2627
import time
27-
# [END import_libraries]
2828

2929

30-
def main(speech_file):
31-
"""Transcribe the given audio file asynchronously.
32-
33-
Args:
34-
speech_file: the name of the audio file.
35-
"""
36-
# [START authenticating]
37-
# Application default credentials provided by env variable
38-
# GOOGLE_APPLICATION_CREDENTIALS
30+
def transcribe_file(speech_file):
31+
"""Transcribe the given audio file asynchronously."""
3932
from google.cloud import speech
4033
speech_client = speech.Client()
41-
# [END authenticating]
4234

43-
# [START construct_request]
44-
# Loads the audio into memory
4535
with io.open(speech_file, 'rb') as audio_file:
4636
content = audio_file.read()
4737
audio_sample = speech_client.sample(
4838
content,
4939
source_uri=None,
5040
encoding='LINEAR16',
5141
sample_rate=16000)
52-
# [END construct_request]
5342

54-
# [START send_request]
5543
operation = speech_client.speech_api.async_recognize(audio_sample)
5644

5745
retry_count = 100
@@ -61,7 +49,7 @@ def main(speech_file):
6149
operation.poll()
6250

6351
if not operation.complete:
64-
print("Operation not complete and retry limit reached.")
52+
print('Operation not complete and retry limit reached.')
6553
return
6654

6755
alternatives = operation.results
@@ -71,13 +59,44 @@ def main(speech_file):
7159
# [END send_request]
7260

7361

74-
# [START run_application]
62+
def transcribe_gcs(gcs_uri):
63+
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
64+
from google.cloud import speech
65+
speech_client = speech.Client()
66+
67+
audio_sample = speech_client.sample(
68+
content=None,
69+
source_uri=gcs_uri,
70+
encoding='FLAC',
71+
sample_rate=16000)
72+
73+
operation = speech_client.speech_api.async_recognize(audio_sample)
74+
75+
retry_count = 100
76+
while retry_count > 0 and not operation.complete:
77+
retry_count -= 1
78+
time.sleep(2)
79+
operation.poll()
80+
81+
if not operation.complete:
82+
print('Operation not complete and retry limit reached.')
83+
return
84+
85+
alternatives = operation.results
86+
for alternative in alternatives:
87+
print('Transcript: {}'.format(alternative.transcript))
88+
print('Confidence: {}'.format(alternative.confidence))
89+
# [END send_request_gcs]
90+
91+
7592
if __name__ == '__main__':
7693
parser = argparse.ArgumentParser(
7794
description=__doc__,
7895
formatter_class=argparse.RawDescriptionHelpFormatter)
7996
parser.add_argument(
80-
'speech_file', help='Full path of audio file to be recognized')
97+
'path', help='File or GCS path for audio file to be recognized')
8198
args = parser.parse_args()
82-
main(args.speech_file)
83-
# [END run_application]
99+
if args.path.startswith('gs://'):
100+
transcribe_gcs(args.path)
101+
else:
102+
transcribe_file(args.path)

speech/cloud-client/transcribe_async_test.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,19 @@
1313

1414
import re
1515

16-
from transcribe_async import main
16+
import transcribe_async
1717

1818

19-
def test_main(resource, capsys):
20-
main(resource('audio.raw'))
19+
def test_transcribe(resource, capsys):
20+
transcribe_async.transcribe_file(resource('audio.raw'))
21+
out, err = capsys.readouterr()
22+
23+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
24+
25+
26+
def test_transcribe_gcs(resource, capsys):
27+
transcribe_async.transcribe_gcs(
28+
'gs://python-docs-samples-tests/speech/audio.flac')
2129
out, err = capsys.readouterr()
2230

2331
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)

speech/cloud-client/transcribe_test.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,19 @@
1313

1414
import re
1515

16-
from transcribe import main
16+
import transcribe
1717

1818

19-
def test_main(resource, capsys):
20-
main(resource('audio.raw'))
19+
def test_transcribe_file(resource, capsys):
20+
transcribe.transcribe_file(resource('audio.raw'))
21+
out, err = capsys.readouterr()
22+
23+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
24+
25+
26+
def test_transcribe_gcs(resource, capsys):
27+
transcribe.transcribe_gcs(
28+
'gs://python-docs-samples-tests/speech/audio.flac')
2129
out, err = capsys.readouterr()
2230

2331
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)