22
22
python beta_snippets.py metadata resources/commercial_mono.wav
23
23
python beta_snippets.py punctuation resources/commercial_mono.wav
24
24
python beta_snippets.py diarization resources/commercial_mono.wav
25
+ python beta_snippets.py multi-channel resources/commercial_mono.wav
25
26
"""
26
27
27
28
import argparse
28
29
import io
29
30
30
- from google .cloud import speech_v1p1beta1 as speech
31
31
32
-
33
- # [START speech_transcribe_file_with_enhanced_model]
34
- def transcribe_file_with_enhanced_model (path ):
32
+ def transcribe_file_with_enhanced_model (speech_file ):
35
33
"""Transcribe the given audio file using an enhanced model."""
34
+ # [START speech_transcribe_file_with_enhanced_model]
35
+ from google .cloud import speech_v1p1beta1 as speech
36
36
client = speech .SpeechClient ()
37
37
38
- with io .open (path , 'rb' ) as audio_file :
38
+ # TODO(developer): Uncomment and set to a path to your audio file.
39
+ # speech_file = 'path/to/file.wav'
40
+
41
+ with io .open (speech_file , 'rb' ) as audio_file :
39
42
content = audio_file .read ()
40
43
41
44
audio = speech .types .RecognitionAudio (content = content )
@@ -56,15 +59,19 @@ def transcribe_file_with_enhanced_model(path):
56
59
print ('-' * 20 )
57
60
print ('First alternative of result {}' .format (i ))
58
61
print ('Transcript: {}' .format (alternative .transcript ))
59
- # [END speech_transcribe_file_with_enhanced_model]
62
+ # [END speech_transcribe_file_with_enhanced_model]
60
63
61
64
62
- # [START speech_transcribe_file_with_metadata]
63
- def transcribe_file_with_metadata (path ):
65
+ def transcribe_file_with_metadata (speech_file ):
64
66
"""Send a request that includes recognition metadata."""
67
+ # [START speech_transcribe_file_with_metadata]
68
+ from google .cloud import speech_v1p1beta1 as speech
65
69
client = speech .SpeechClient ()
66
70
67
- with io .open (path , 'rb' ) as audio_file :
71
+ # TODO(developer): Uncomment and set to a path to your audio file.
72
+ # speech_file = 'path/to/file.wav'
73
+
74
+ with io .open (speech_file , 'rb' ) as audio_file :
68
75
content = audio_file .read ()
69
76
70
77
# Here we construct a recognition metadata object.
@@ -98,15 +105,19 @@ def transcribe_file_with_metadata(path):
98
105
print ('-' * 20 )
99
106
print ('First alternative of result {}' .format (i ))
100
107
print ('Transcript: {}' .format (alternative .transcript ))
101
- # [END speech_transcribe_file_with_metadata]
108
+ # [END speech_transcribe_file_with_metadata]
102
109
103
110
104
- # [START speech_transcribe_file_with_auto_punctuation]
105
- def transcribe_file_with_auto_punctuation (path ):
111
+ def transcribe_file_with_auto_punctuation (speech_file ):
106
112
"""Transcribe the given audio file with auto punctuation enabled."""
113
+ # [START speech_transcribe_file_with_auto_punctuation]
114
+ from google .cloud import speech_v1p1beta1 as speech
107
115
client = speech .SpeechClient ()
108
116
109
- with io .open (path , 'rb' ) as audio_file :
117
+ # TODO(developer): Uncomment and set to a path to your audio file.
118
+ # speech_file = 'path/to/file.wav'
119
+
120
+ with io .open (speech_file , 'rb' ) as audio_file :
110
121
content = audio_file .read ()
111
122
112
123
audio = speech .types .RecognitionAudio (content = content )
@@ -124,15 +135,19 @@ def transcribe_file_with_auto_punctuation(path):
124
135
print ('-' * 20 )
125
136
print ('First alternative of result {}' .format (i ))
126
137
print ('Transcript: {}' .format (alternative .transcript ))
127
- # [END speech_transcribe_file_with_auto_punctuation]
138
+ # [END speech_transcribe_file_with_auto_punctuation]
128
139
129
140
130
- # [START speech_transcribe_diarization]
131
- def transcribe_file_with_diarization (path ):
141
+ def transcribe_file_with_diarization (speech_file ):
132
142
"""Transcribe the given audio file synchronously with diarization."""
143
+ # [START speech_transcribe_diarization]
144
+ from google .cloud import speech_v1p1beta1 as speech
133
145
client = speech .SpeechClient ()
134
146
135
- with open (path , 'rb' ) as audio_file :
147
+ # TODO(developer): Uncomment and set to a path to your audio file.
148
+ # speech_file = 'path/to/file.wav'
149
+
150
+ with open (speech_file , 'rb' ) as audio_file :
136
151
content = audio_file .read ()
137
152
138
153
audio = speech .types .RecognitionAudio (content = content )
@@ -154,7 +169,40 @@ def transcribe_file_with_diarization(path):
154
169
.format (i , alternative .transcript ))
155
170
print ('Speaker Tag for the first word: {}'
156
171
.format (alternative .words [0 ].speaker_tag ))
157
- # [END speech_transcribe_diarization]
172
+ # [END speech_transcribe_diarization]
173
+
174
+
175
+ def transcribe_file_with_multichannel (speech_file ):
176
+ """Transcribe the given audio file synchronously with
177
+ multi channel."""
178
+ # [START speech_transcribe_multichannel]
179
+ from google .cloud import speech_v1p1beta1 as speech
180
+ client = speech .SpeechClient ()
181
+
182
+ # TODO(developer): Uncomment and set to a path to your audio file.
183
+ # speech_file = 'path/to/file.wav'
184
+
185
+ with open (speech_file , 'rb' ) as audio_file :
186
+ content = audio_file .read ()
187
+
188
+ audio = speech .types .RecognitionAudio (content = content )
189
+
190
+ config = speech .types .RecognitionConfig (
191
+ encoding = speech .enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
192
+ sample_rate_hertz = 16000 ,
193
+ language_code = 'en-US' ,
194
+ audio_channel_count = 1 ,
195
+ enable_separate_recognition_per_channel = True )
196
+
197
+ response = client .recognize (config , audio )
198
+
199
+ for i , result in enumerate (response .results ):
200
+ alternative = result .alternatives [0 ]
201
+ print ('-' * 20 )
202
+ print ('First alternative of result {}' .format (i ))
203
+ print (u'Transcript: {}' .format (alternative .transcript ))
204
+ print (u'Channel Tag: {}' .format (result .channel_tag ))
205
+ # [END speech_transcribe_multichannel]
158
206
159
207
160
208
if __name__ == '__main__' :
@@ -175,3 +223,5 @@ def transcribe_file_with_diarization(path):
175
223
transcribe_file_with_auto_punctuation (args .path )
176
224
elif args .command == 'diarization' :
177
225
transcribe_file_with_diarization (args .path )
226
+ elif args .command == 'multi-channel' :
227
+ transcribe_file_with_multichannel (args .path )
0 commit comments