Skip to content

Commit 381c2d1

Browse files
happyhumantelpirion
authored andcommitted
Diarization [(#1556)](#1556)
Diarization
1 parent a53c08d commit 381c2d1

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

speech/snippets/README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ To run this sample:
230230
python beta_snippets.py enhanced-model resources/commercial_mono.wav
231231
python beta_snippets.py metadata resources/commercial_mono.wav
232232
python beta_snippets.py punctuation resources/commercial_mono.wav
233+
python beta_snippets.py diarization resources/commercial_mono.wav
233234
234235
positional arguments:
235236
command

speech/snippets/beta_snippets.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
python beta_snippets.py enhanced-model resources/commercial_mono.wav
2222
python beta_snippets.py metadata resources/commercial_mono.wav
2323
python beta_snippets.py punctuation resources/commercial_mono.wav
24+
python beta_snippets.py diarization resources/commercial_mono.wav
2425
"""
2526

2627
import argparse
@@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path):
126127
# [END speech_transcribe_file_with_auto_punctuation]
127128

128129

130+
# [START speech_transcribe_diarization]
131+
def transcribe_file_with_diarization(path):
132+
"""Transcribe the given audio file synchronously with diarization."""
133+
client = speech.SpeechClient()
134+
135+
with open(path, 'rb') as audio_file:
136+
content = audio_file.read()
137+
138+
audio = speech.types.RecognitionAudio(content=content)
139+
140+
config = speech.types.RecognitionConfig(
141+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
142+
sample_rate_hertz=16000,
143+
language_code='en-US',
144+
enable_speaker_diarization=True,
145+
diarization_speaker_count=2)
146+
147+
print('Waiting for operation to complete...')
148+
response = client.recognize(config, audio)
149+
150+
for i, result in enumerate(response.results):
151+
alternative = result.alternatives[0]
152+
print('-' * 20)
153+
print('First alternative of result {}: {}'
154+
.format(i, alternative.transcript))
155+
print('Speaker Tag for the first word: {}'
156+
.format(alternative.words[0].speaker_tag))
157+
# [END speech_transcribe_diarization]
158+
159+
129160
if __name__ == '__main__':
130161
parser = argparse.ArgumentParser(
131162
description=__doc__,
@@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path):
142173
transcribe_file_with_metadata(args.path)
143174
elif args.command == 'punctuation':
144175
transcribe_file_with_auto_punctuation(args.path)
176+
elif args.command == 'diarization':
177+
transcribe_file_with_diarization(args.path)

speech/snippets/beta_snippets_test.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
import os
1515

1616
from beta_snippets import (
17-
transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model,
17+
transcribe_file_with_auto_punctuation,
18+
transcribe_file_with_diarization,
19+
transcribe_file_with_enhanced_model,
1820
transcribe_file_with_metadata)
1921

2022
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
@@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys):
4244
out, _ = capsys.readouterr()
4345

4446
assert 'Okay. Sure.' in out
47+
48+
49+
def test_transcribe_diarization(capsys):
50+
transcribe_file_with_diarization(
51+
os.path.join(RESOURCES, 'Google_Gnome.wav'))
52+
out, err = capsys.readouterr()
53+
54+
assert 'OK Google stream stranger things from Netflix to my TV' in out

speech/snippets/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-speech==0.33.0
1+
google-cloud-speech==0.35.0

0 commit comments

Comments
 (0)