Skip to content

Commit 1138817

Browse files
nnegreybusunkim96
authored andcommitted
Update enhanced models and auto punctuation to GA [(#1702)](GoogleCloudPlatform/python-docs-samples#1702)
* Update enhanced models and auto punctuation to GA * Update model-slection to GA
1 parent 53feb06 commit 1138817

8 files changed

+258
-7
lines changed

packages/google-cloud-python-speech/samples/snippets/README.rst

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,68 @@ To run this sample:
206206
207207
208208
209+
Transcribe Enhanced Models
210+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
211+
212+
.. image:: https://gstatic.com/cloudssh/images/open-btn.png
213+
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_enhanced_model.py,speech/cloud-client/README.rst
214+
215+
216+
217+
218+
To run this sample:
219+
220+
.. code-block:: bash
221+
222+
$ python transcribe_enhanced_model.py
223+
224+
usage: transcribe_enhanced_model.py [-h] path
225+
226+
Google Cloud Speech API sample that demonstrates enhanced models
227+
and recognition metadata.
228+
229+
Example usage:
230+
python transcribe_enhanced_model.py resources/commercial_mono.wav
231+
232+
positional arguments:
233+
path File to stream to the API
234+
235+
optional arguments:
236+
-h, --help show this help message and exit
237+
238+
239+
240+
Transcribe Automatic Punctuation
241+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
242+
243+
.. image:: https://gstatic.com/cloudssh/images/open-btn.png
244+
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=speech/cloud-client/transcribe_auto_punctuation.py,speech/cloud-client/README.rst
245+
246+
247+
248+
249+
To run this sample:
250+
251+
.. code-block:: bash
252+
253+
$ python transcribe_auto_punctuation.py
254+
255+
usage: transcribe_auto_punctuation.py [-h] path
256+
257+
Google Cloud Speech API sample that demonstrates auto punctuation
258+
and recognition metadata.
259+
260+
Example usage:
261+
python transcribe_auto_punctuation.py resources/commercial_mono.wav
262+
263+
positional arguments:
264+
path File to stream to the API
265+
266+
optional arguments:
267+
-h, --help show this help message and exit
268+
269+
270+
209271
Beta Samples
210272
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
211273

packages/google-cloud-python-speech/samples/snippets/README.rst.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ samples:
3434
- name: Transcribe Streaming
3535
file: transcribe_streaming.py
3636
show_help: true
37+
- name: Transcribe Enhanced Models
38+
file: transcribe_enhanced_model.py
39+
show_help: true
40+
- name: Transcribe Automatic Punctuation
41+
file: transcribe_auto_punctuation.py
42+
show_help: true
3743
- name: Beta Samples
3844
file: beta_snippets.py
3945
show_help: true
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-speech==0.35.0
1+
google-cloud-speech==0.36.0
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates auto punctuation
18+
and recognition metadata.
19+
20+
Example usage:
21+
python transcribe_auto_punctuation.py resources/commercial_mono.wav
22+
"""
23+
24+
import argparse
25+
import io
26+
27+
28+
def transcribe_file_with_auto_punctuation(path):
29+
"""Transcribe the given audio file with auto punctuation enabled."""
30+
# [START speech_transcribe_auto_punctuation]
31+
from google.cloud import speech
32+
client = speech.SpeechClient()
33+
34+
# path = 'resources/commercial_mono.wav'
35+
with io.open(path, 'rb') as audio_file:
36+
content = audio_file.read()
37+
38+
audio = speech.types.RecognitionAudio(content=content)
39+
config = speech.types.RecognitionConfig(
40+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
41+
sample_rate_hertz=8000,
42+
language_code='en-US',
43+
# Enable automatic punctuation
44+
enable_automatic_punctuation=True)
45+
46+
response = client.recognize(config, audio)
47+
48+
for i, result in enumerate(response.results):
49+
alternative = result.alternatives[0]
50+
print('-' * 20)
51+
print('First alternative of result {}'.format(i))
52+
print('Transcript: {}'.format(alternative.transcript))
53+
# [END speech_transcribe_auto_punctuation]
54+
55+
56+
if __name__ == '__main__':
57+
parser = argparse.ArgumentParser(
58+
description=__doc__,
59+
formatter_class=argparse.RawDescriptionHelpFormatter)
60+
parser.add_argument('path', help='File to stream to the API')
61+
62+
args = parser.parse_args()
63+
64+
transcribe_file_with_auto_punctuation(args.path)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2018, Google LLC
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
16+
import transcribe_auto_punctuation
17+
18+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
19+
20+
21+
def test_transcribe_file_with_auto_punctuation(capsys):
22+
transcribe_auto_punctuation.transcribe_file_with_auto_punctuation(
23+
'resources/commercial_mono.wav')
24+
out, _ = capsys.readouterr()
25+
26+
assert 'Okay. Sure.' in out
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates enhanced models
18+
and recognition metadata.
19+
20+
Example usage:
21+
python transcribe_enhanced_model.py resources/commercial_mono.wav
22+
"""
23+
24+
import argparse
25+
import io
26+
27+
28+
def transcribe_file_with_enhanced_model(path):
29+
"""Transcribe the given audio file using an enhanced model."""
30+
# [START speech_transcribe_enhanced_model]
31+
from google.cloud import speech
32+
client = speech.SpeechClient()
33+
34+
# path = 'resources/commercial_mono.wav'
35+
with io.open(path, 'rb') as audio_file:
36+
content = audio_file.read()
37+
38+
audio = speech.types.RecognitionAudio(content=content)
39+
config = speech.types.RecognitionConfig(
40+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
41+
sample_rate_hertz=8000,
42+
language_code='en-US',
43+
# Enhanced models are only available to projects that
44+
# opt in for audio data collection.
45+
use_enhanced=True,
46+
# A model must be specified to use enhanced model.
47+
model='phone_call')
48+
49+
response = client.recognize(config, audio)
50+
51+
for i, result in enumerate(response.results):
52+
alternative = result.alternatives[0]
53+
print('-' * 20)
54+
print('First alternative of result {}'.format(i))
55+
print('Transcript: {}'.format(alternative.transcript))
56+
# [END speech_transcribe_enhanced_model]
57+
58+
59+
if __name__ == '__main__':
60+
parser = argparse.ArgumentParser(
61+
description=__doc__,
62+
formatter_class=argparse.RawDescriptionHelpFormatter)
63+
parser.add_argument('path', help='File to stream to the API')
64+
65+
args = parser.parse_args()
66+
67+
transcribe_file_with_enhanced_model(args.path)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright 2018, Google LLC
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
16+
import transcribe_enhanced_model
17+
18+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
19+
20+
21+
def test_transcribe_file_with_enhanced_model(capsys):
22+
transcribe_enhanced_model.transcribe_file_with_enhanced_model(
23+
'resources/commercial_mono.wav')
24+
out, _ = capsys.readouterr()
25+
26+
assert 'Chrome' in out

packages/google-cloud-python-speech/samples/snippets/transcribe_model_selection.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@
2727
import argparse
2828

2929

30-
# [START speech_transcribe_model_selection_beta]
30+
# [START speech_transcribe_model_selection]
3131
def transcribe_model_selection(speech_file, model):
3232
"""Transcribe the given audio file synchronously with
3333
the selected model."""
34-
from google.cloud import speech_v1p1beta1 as speech
34+
from google.cloud import speech
3535
client = speech.SpeechClient()
3636

3737
with open(speech_file, 'rb') as audio_file:
@@ -52,14 +52,14 @@ def transcribe_model_selection(speech_file, model):
5252
print('-' * 20)
5353
print('First alternative of result {}'.format(i))
5454
print(u'Transcript: {}'.format(alternative.transcript))
55-
# [END speech_transcribe_model_selection_beta]
55+
# [END speech_transcribe_model_selection]
5656

5757

58-
# [START speech_transcribe_model_selection_gcs_beta]
58+
# [START speech_transcribe_model_selection_gcs]
5959
def transcribe_model_selection_gcs(gcs_uri, model):
6060
"""Transcribe the given audio file asynchronously with
6161
the selected model."""
62-
from google.cloud import speech_v1p1beta1 as speech
62+
from google.cloud import speech
6363
client = speech.SpeechClient()
6464

6565
audio = speech.types.RecognitionAudio(uri=gcs_uri)
@@ -80,7 +80,7 @@ def transcribe_model_selection_gcs(gcs_uri, model):
8080
print('-' * 20)
8181
print('First alternative of result {}'.format(i))
8282
print(u'Transcript: {}'.format(alternative.transcript))
83-
# [END speech_transcribe_model_selection_gcs_beta]
83+
# [END speech_transcribe_model_selection_gcs]
8484

8585

8686
if __name__ == '__main__':

0 commit comments

Comments
 (0)