Skip to content

Commit 046b7df

Browse files
happyhumanbusunkim96
authored andcommitted
* Implemented the multi channel sample * Added parameter comment * Moved region tags inside the functions * Deleted the extra line * Fixing typos
1 parent 211966e commit 046b7df

File tree

3 files changed

+79
-19
lines changed

3 files changed

+79
-19
lines changed

google-cloud-speech/samples/snippets/README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ To run this sample:
231231
python beta_snippets.py metadata resources/commercial_mono.wav
232232
python beta_snippets.py punctuation resources/commercial_mono.wav
233233
python beta_snippets.py diarization resources/commercial_mono.wav
234+
python beta_snippets.py multi-channel resources/commercial_mono.wav
234235
235236
positional arguments:
236237
command

google-cloud-speech/samples/snippets/beta_snippets.py

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,23 @@
2222
python beta_snippets.py metadata resources/commercial_mono.wav
2323
python beta_snippets.py punctuation resources/commercial_mono.wav
2424
python beta_snippets.py diarization resources/commercial_mono.wav
25+
python beta_snippets.py multi-channel resources/commercial_mono.wav
2526
"""
2627

2728
import argparse
2829
import io
2930

30-
from google.cloud import speech_v1p1beta1 as speech
3131

32-
33-
# [START speech_transcribe_file_with_enhanced_model]
34-
def transcribe_file_with_enhanced_model(path):
32+
def transcribe_file_with_enhanced_model(speech_file):
3533
"""Transcribe the given audio file using an enhanced model."""
34+
# [START speech_transcribe_file_with_enhanced_model]
35+
from google.cloud import speech_v1p1beta1 as speech
3636
client = speech.SpeechClient()
3737

38-
with io.open(path, 'rb') as audio_file:
38+
# TODO(developer): Uncomment and set to a path to your audio file.
39+
# speech_file = 'path/to/file.wav'
40+
41+
with io.open(speech_file, 'rb') as audio_file:
3942
content = audio_file.read()
4043

4144
audio = speech.types.RecognitionAudio(content=content)
@@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path):
5659
print('-' * 20)
5760
print('First alternative of result {}'.format(i))
5861
print('Transcript: {}'.format(alternative.transcript))
59-
# [END speech_transcribe_file_with_enhanced_model]
62+
# [END speech_transcribe_file_with_enhanced_model]
6063

6164

62-
# [START speech_transcribe_file_with_metadata]
63-
def transcribe_file_with_metadata(path):
65+
def transcribe_file_with_metadata(speech_file):
6466
"""Send a request that includes recognition metadata."""
67+
# [START speech_transcribe_file_with_metadata]
68+
from google.cloud import speech_v1p1beta1 as speech
6569
client = speech.SpeechClient()
6670

67-
with io.open(path, 'rb') as audio_file:
71+
# TODO(developer): Uncomment and set to a path to your audio file.
72+
# speech_file = 'path/to/file.wav'
73+
74+
with io.open(speech_file, 'rb') as audio_file:
6875
content = audio_file.read()
6976

7077
# Here we construct a recognition metadata object.
@@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path):
98105
print('-' * 20)
99106
print('First alternative of result {}'.format(i))
100107
print('Transcript: {}'.format(alternative.transcript))
101-
# [END speech_transcribe_file_with_metadata]
108+
# [END speech_transcribe_file_with_metadata]
102109

103110

104-
# [START speech_transcribe_file_with_auto_punctuation]
105-
def transcribe_file_with_auto_punctuation(path):
111+
def transcribe_file_with_auto_punctuation(speech_file):
106112
"""Transcribe the given audio file with auto punctuation enabled."""
113+
# [START speech_transcribe_file_with_auto_punctuation]
114+
from google.cloud import speech_v1p1beta1 as speech
107115
client = speech.SpeechClient()
108116

109-
with io.open(path, 'rb') as audio_file:
117+
# TODO(developer): Uncomment and set to a path to your audio file.
118+
# speech_file = 'path/to/file.wav'
119+
120+
with io.open(speech_file, 'rb') as audio_file:
110121
content = audio_file.read()
111122

112123
audio = speech.types.RecognitionAudio(content=content)
@@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path):
124135
print('-' * 20)
125136
print('First alternative of result {}'.format(i))
126137
print('Transcript: {}'.format(alternative.transcript))
127-
# [END speech_transcribe_file_with_auto_punctuation]
138+
# [END speech_transcribe_file_with_auto_punctuation]
128139

129140

130-
# [START speech_transcribe_diarization]
131-
def transcribe_file_with_diarization(path):
141+
def transcribe_file_with_diarization(speech_file):
132142
"""Transcribe the given audio file synchronously with diarization."""
143+
# [START speech_transcribe_diarization]
144+
from google.cloud import speech_v1p1beta1 as speech
133145
client = speech.SpeechClient()
134146

135-
with open(path, 'rb') as audio_file:
147+
# TODO(developer): Uncomment and set to a path to your audio file.
148+
# speech_file = 'path/to/file.wav'
149+
150+
with open(speech_file, 'rb') as audio_file:
136151
content = audio_file.read()
137152

138153
audio = speech.types.RecognitionAudio(content=content)
@@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path):
154169
.format(i, alternative.transcript))
155170
print('Speaker Tag for the first word: {}'
156171
.format(alternative.words[0].speaker_tag))
157-
# [END speech_transcribe_diarization]
172+
# [END speech_transcribe_diarization]
173+
174+
175+
def transcribe_file_with_multichannel(speech_file):
176+
"""Transcribe the given audio file synchronously with
177+
multi channel."""
178+
# [START speech_transcribe_multichannel]
179+
from google.cloud import speech_v1p1beta1 as speech
180+
client = speech.SpeechClient()
181+
182+
# TODO(developer): Uncomment and set to a path to your audio file.
183+
# speech_file = 'path/to/file.wav'
184+
185+
with open(speech_file, 'rb') as audio_file:
186+
content = audio_file.read()
187+
188+
audio = speech.types.RecognitionAudio(content=content)
189+
190+
config = speech.types.RecognitionConfig(
191+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
192+
sample_rate_hertz=16000,
193+
language_code='en-US',
194+
audio_channel_count=1,
195+
enable_separate_recognition_per_channel=True)
196+
197+
response = client.recognize(config, audio)
198+
199+
for i, result in enumerate(response.results):
200+
alternative = result.alternatives[0]
201+
print('-' * 20)
202+
print('First alternative of result {}'.format(i))
203+
print(u'Transcript: {}'.format(alternative.transcript))
204+
print(u'Channel Tag: {}'.format(result.channel_tag))
205+
# [END speech_transcribe_multichannel]
158206

159207

160208
if __name__ == '__main__':
@@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path):
175223
transcribe_file_with_auto_punctuation(args.path)
176224
elif args.command == 'diarization':
177225
transcribe_file_with_diarization(args.path)
226+
elif args.command == 'multi-channel':
227+
transcribe_file_with_multichannel(args.path)

google-cloud-speech/samples/snippets/beta_snippets_test.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
transcribe_file_with_auto_punctuation,
1818
transcribe_file_with_diarization,
1919
transcribe_file_with_enhanced_model,
20-
transcribe_file_with_metadata)
20+
transcribe_file_with_metadata,
21+
transcribe_file_with_multichannel)
2122

2223
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
2324

@@ -52,3 +53,11 @@ def test_transcribe_diarization(capsys):
5253
out, err = capsys.readouterr()
5354

5455
assert 'OK Google stream stranger things from Netflix to my TV' in out
56+
57+
58+
def test_transcribe_multichannel_file(capsys):
59+
transcribe_file_with_multichannel(
60+
os.path.join(RESOURCES, 'Google_Gnome.wav'))
61+
out, err = capsys.readouterr()
62+
63+
assert 'OK Google stream stranger things from Netflix to my TV' in out

0 commit comments

Comments
 (0)