Skip to content

Commit ceb5b22

Browse files
Jon Wayne Parrottbusunkim96
Jon Wayne Parrott
authored andcommitted
Add word time offset samples [(#1050)](#1050)
1 parent 5791d87 commit ceb5b22

6 files changed

+189
-40
lines changed

speech/snippets/README.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,32 @@ To run this sample:
140140
-h, --help show this help message and exit
141141
142142
143+
Transcribe with word time offsets
144+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
145+
146+
147+
148+
To run this sample:
149+
150+
.. code-block:: bash
151+
152+
$ python transcribe_word_time_offsets.py
153+
154+
usage: transcribe_word_time_offsets.py [-h] path
155+
156+
Google Cloud Speech API sample that demonstrates word time offsets.
157+
158+
Example usage:
159+
python transcribe_word_time_offsets.py resources/audio.raw
160+
python transcribe_word_time_offsets.py gs://cloud-samples-tests/speech/vr.flac
161+
162+
positional arguments:
163+
path File or GCS path for audio file to be recognized
164+
165+
optional arguments:
166+
-h, --help show this help message and exit
167+
168+
143169
Transcribe Streaming
144170
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
145171

speech/snippets/README.rst.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ samples:
2828
- name: Transcribe async
2929
file: transcribe_async.py
3030
show_help: true
31+
- name: Transcribe with word time offsets
32+
file: transcribe_word_time_offsets.py
33+
show_help: true
3134
- name: Transcribe Streaming
3235
file: transcribe_streaming.py
3336
show_help: true

speech/snippets/transcribe_async.py

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
import argparse
2626
import io
27-
import time
2827

2928

3029
# [START def_transcribe_file]
@@ -49,17 +48,10 @@ def transcribe_file(speech_file):
4948
operation = client.long_running_recognize(config, audio)
5049
# [END migration_async_request]
5150

52-
# Sleep and poll operation.done()
53-
retry_count = 100
54-
while retry_count > 0 and not operation.done():
55-
retry_count -= 1
56-
time.sleep(2)
51+
print('Waiting for operation to complete...')
52+
result = operation.result(timeout=90)
5753

58-
if not operation.done():
59-
print('Operation not complete and retry limit reached.')
60-
return
61-
62-
alternatives = operation.result().results[0].alternatives
54+
alternatives = result.results[0].alternatives
6355
for alternative in alternatives:
6456
print('Transcript: {}'.format(alternative.transcript))
6557
print('Confidence: {}'.format(alternative.confidence))
@@ -84,28 +76,13 @@ def transcribe_gcs(gcs_uri):
8476

8577
operation = client.long_running_recognize(config, audio)
8678

87-
retry_count = 100
88-
while retry_count > 0 and not operation.done():
89-
retry_count -= 1
90-
time.sleep(2)
91-
92-
if not operation.done():
93-
print('Operation not complete and retry limit reached.')
94-
return
79+
print('Waiting for operation to complete...')
80+
result = operation.result(timeout=90)
9581

96-
alternatives = operation.result().results[0].alternatives
82+
alternatives = result.results[0].alternatives
9783
for alternative in alternatives:
9884
print('Transcript: {}'.format(alternative.transcript))
9985
print('Confidence: {}'.format(alternative.confidence))
100-
101-
for word_info in alternative.words:
102-
word = word_info.word
103-
start_time = word_info.start_time
104-
end_time = word_info.end_time
105-
print('Word: {}, start_time: {}, end_time: {}'.format(
106-
word,
107-
start_time.seconds + start_time.nanos * 1e-9,
108-
end_time.seconds + end_time.nanos * 1e-9))
10986
# [END def_transcribe_gcs]
11087

11188

speech/snippets/transcribe_async_test.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,3 @@ def test_transcribe_gcs(capsys):
3333
out, err = capsys.readouterr()
3434

3535
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
36-
37-
38-
def test_transcribe_gcs_word_time_offsets(capsys):
39-
transcribe_async.transcribe_gcs(
40-
'gs://python-docs-samples-tests/speech/audio.flac')
41-
out, err = capsys.readouterr()
42-
43-
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
44-
time = float(match.group(1))
45-
46-
assert time > 0
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates word time offsets.
18+
19+
Example usage:
20+
python transcribe_word_time_offsets.py resources/audio.raw
21+
python transcribe_word_time_offsets.py \
22+
gs://cloud-samples-tests/speech/vr.flac
23+
"""
24+
25+
import argparse
26+
import io
27+
28+
29+
def transcribe_file_with_word_time_offsets(speech_file):
30+
"""Transcribe the given audio file synchronously and output the word time
31+
offsets."""
32+
from google.cloud import speech
33+
from google.cloud.speech import enums
34+
from google.cloud.speech import types
35+
client = speech.SpeechClient()
36+
37+
with io.open(speech_file, 'rb') as audio_file:
38+
content = audio_file.read()
39+
40+
audio = types.RecognitionAudio(content=content)
41+
config = types.RecognitionConfig(
42+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
43+
sample_rate_hertz=16000,
44+
language_code='en-US',
45+
enable_word_time_offsets=True)
46+
47+
response = client.recognize(config, audio)
48+
49+
alternatives = response.results[0].alternatives
50+
51+
for alternative in alternatives:
52+
print('Transcript: {}'.format(alternative.transcript))
53+
54+
for word_info in alternative.words:
55+
word = word_info.word
56+
start_time = word_info.start_time
57+
end_time = word_info.end_time
58+
print('Word: {}, start_time: {}, end_time: {}'.format(
59+
word,
60+
start_time.seconds + start_time.nanos * 1e-9,
61+
end_time.seconds + end_time.nanos * 1e-9))
62+
63+
64+
# [START def_transcribe_gcs]
65+
def transcribe_gcs_with_word_time_offsets(gcs_uri):
66+
"""Transcribe the given audio file asynchronously and output the word time
67+
offsets."""
68+
from google.cloud import speech
69+
from google.cloud.speech import enums
70+
from google.cloud.speech import types
71+
client = speech.SpeechClient()
72+
73+
audio = types.RecognitionAudio(uri=gcs_uri)
74+
config = types.RecognitionConfig(
75+
encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
76+
sample_rate_hertz=16000,
77+
language_code='en-US',
78+
enable_word_time_offsets=True)
79+
80+
operation = client.long_running_recognize(config, audio)
81+
82+
print('Waiting for operation to complete...')
83+
result = operation.result(timeout=90)
84+
85+
alternatives = result.results[0].alternatives
86+
for alternative in alternatives:
87+
print('Transcript: {}'.format(alternative.transcript))
88+
print('Confidence: {}'.format(alternative.confidence))
89+
90+
for word_info in alternative.words:
91+
word = word_info.word
92+
start_time = word_info.start_time
93+
end_time = word_info.end_time
94+
print('Word: {}, start_time: {}, end_time: {}'.format(
95+
word,
96+
start_time.seconds + start_time.nanos * 1e-9,
97+
end_time.seconds + end_time.nanos * 1e-9))
98+
# [END def_transcribe_gcs]
99+
100+
101+
if __name__ == '__main__':
102+
parser = argparse.ArgumentParser(
103+
description=__doc__,
104+
formatter_class=argparse.RawDescriptionHelpFormatter)
105+
parser.add_argument(
106+
'path', help='File or GCS path for audio file to be recognized')
107+
args = parser.parse_args()
108+
if args.path.startswith('gs://'):
109+
transcribe_gcs_with_word_time_offsets(args.path)
110+
else:
111+
transcribe_file_with_word_time_offsets(args.path)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
17+
import transcribe_word_time_offsets
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
20+
21+
22+
def test_transcribe_file_with_word_time_offsets(capsys):
23+
transcribe_word_time_offsets.transcribe_file_with_word_time_offsets(
24+
os.path.join(RESOURCES, 'audio.raw'))
25+
out, _ = capsys.readouterr()
26+
27+
print(out)
28+
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
29+
time = float(match.group(1))
30+
31+
assert time > 0
32+
33+
34+
def test_transcribe_gcs_with_word_time_offsets(capsys):
35+
transcribe_word_time_offsets.transcribe_gcs_with_word_time_offsets(
36+
'gs://python-docs-samples-tests/speech/audio.flac')
37+
out, _ = capsys.readouterr()
38+
39+
print(out)
40+
match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
41+
time = float(match.group(1))
42+
43+
assert time > 0

0 commit comments

Comments
 (0)