Skip to content

Commit e302bf8

Browse files
authored
docs(samples): remove io dependency in transcribe samples (#223)
* refactor: Refactor transcribe samples into two separate samples (local-file vs gcs) and remove io dependency. Refactor tests. * Removing CLI in samples and shebangs * leaving transcribe_async.py in (to be removed later), because speech_transcribe_async_gcs is a required tag. * Update year in license headers Co-authored-by: Nick Cain <[email protected]>
1 parent 8fa9c42 commit e302bf8

File tree

4 files changed

+135
-10
lines changed

4 files changed

+135
-10
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2021 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Google Cloud Speech-to-Text sample application using gRPC for async
16+
batch processing.
17+
"""
18+
19+
20+
# [START speech_transcribe_async]
21+
def transcribe_file(speech_file):
22+
"""Transcribe the given audio file asynchronously."""
23+
from google.cloud import speech
24+
25+
client = speech.SpeechClient()
26+
27+
# [START speech_python_migration_async_request]
28+
with open(speech_file, "rb") as audio_file:
29+
content = audio_file.read()
30+
31+
"""
32+
Note that transcription is limited to a 60 seconds audio file.
33+
Use a GCS file for audio longer than 1 minute.
34+
"""
35+
audio = speech.RecognitionAudio(content=content)
36+
37+
config = speech.RecognitionConfig(
38+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
39+
sample_rate_hertz=16000,
40+
language_code="en-US",
41+
)
42+
43+
# [START speech_python_migration_async_response]
44+
45+
operation = client.long_running_recognize(config=config, audio=audio)
46+
# [END speech_python_migration_async_request]
47+
48+
print("Waiting for operation to complete...")
49+
response = operation.result(timeout=90)
50+
51+
# Each result is for a consecutive portion of the audio. Iterate through
52+
# them to get the transcripts for the entire audio file.
53+
for result in response.results:
54+
# The first alternative is the most likely one for this portion.
55+
print(u"Transcript: {}".format(result.alternatives[0].transcript))
56+
print("Confidence: {}".format(result.alternatives[0].confidence))
57+
# [END speech_python_migration_async_response]
58+
59+
60+
# [END speech_transcribe_async]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2021, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
17+
import transcribe_async_file
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
20+
21+
22+
def test_transcribe(capsys):
23+
transcribe_async_file.transcribe_file(os.path.join(RESOURCES, "audio.raw"))
24+
out, err = capsys.readouterr()
25+
26+
assert re.search(r"how old is the Brooklyn Bridge", out, re.DOTALL | re.I)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2021 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Google Cloud Speech-to-Text sample application using the gRPC for async
16+
batch processing.
17+
"""
18+
19+
20+
# [START speech_transcribe_async_gcs]
21+
def transcribe_gcs(gcs_uri):
22+
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
23+
from google.cloud import speech
24+
25+
client = speech.SpeechClient()
26+
27+
audio = speech.RecognitionAudio(uri=gcs_uri)
28+
config = speech.RecognitionConfig(
29+
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
30+
sample_rate_hertz=16000,
31+
language_code="en-US",
32+
)
33+
34+
operation = client.long_running_recognize(config=config, audio=audio)
35+
36+
print("Waiting for operation to complete...")
37+
response = operation.result(timeout=90)
38+
39+
# Each result is for a consecutive portion of the audio. Iterate through
40+
# them to get the transcripts for the entire audio file.
41+
for result in response.results:
42+
# The first alternative is the most likely one for this portion.
43+
print(u"Transcript: {}".format(result.alternatives[0].transcript))
44+
print("Confidence: {}".format(result.alternatives[0].confidence))
45+
# [END speech_transcribe_async_gcs]
Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2016, Google, Inc.
1+
# Copyright 2021, Google, Inc.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -14,20 +14,14 @@
1414
import os
1515
import re
1616

17-
import transcribe_async
17+
import transcribe_async_gcs
1818

1919
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
2020

2121

22-
def test_transcribe(capsys):
23-
transcribe_async.transcribe_file(os.path.join(RESOURCES, "audio.raw"))
24-
out, err = capsys.readouterr()
25-
26-
assert re.search(r"how old is the Brooklyn Bridge", out, re.DOTALL | re.I)
27-
28-
2922
def test_transcribe_gcs(capsys):
30-
transcribe_async.transcribe_gcs("gs://python-docs-samples-tests/speech/audio.flac")
23+
gcs_path = "gs://python-docs-samples-tests/speech/audio.flac"
24+
transcribe_async_gcs.transcribe_gcs(gcs_path)
3125
out, err = capsys.readouterr()
3226

3327
assert re.search(r"how old is the Brooklyn Bridge", out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)