Diarization [(#1556)](#1556)

happyhuman · telpirion · commit 381c2d1745b1 · 2023-03-13T19:55:23.000Z
Diarization
diff --git a/speech/snippets/README.rst b/speech/snippets/README.rst
@@ -230,6 +230,7 @@ To run this sample:
         python beta_snippets.py enhanced-model resources/commercial_mono.wav
         python beta_snippets.py metadata resources/commercial_mono.wav
         python beta_snippets.py punctuation resources/commercial_mono.wav
+        python beta_snippets.py diarization resources/commercial_mono.wav
 
     positional arguments:
       command
diff --git a/speech/snippets/beta_snippets.py b/speech/snippets/beta_snippets.py
@@ -21,6 +21,7 @@
     python beta_snippets.py enhanced-model resources/commercial_mono.wav
     python beta_snippets.py metadata resources/commercial_mono.wav
     python beta_snippets.py punctuation resources/commercial_mono.wav
+    python beta_snippets.py diarization resources/commercial_mono.wav
 """
 
 import argparse
@@ -126,6 +127,36 @@ def transcribe_file_with_auto_punctuation(path):
 # [END speech_transcribe_file_with_auto_punctuation]
 
 
+# [START speech_transcribe_diarization]
+def transcribe_file_with_diarization(path):
+    """Transcribe the given audio file synchronously with diarization."""
+    client = speech.SpeechClient()
+
+    with open(path, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = speech.types.RecognitionAudio(content=content)
+
+    config = speech.types.RecognitionConfig(
+        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code='en-US',
+        enable_speaker_diarization=True,
+        diarization_speaker_count=2)
+
+    print('Waiting for operation to complete...')
+    response = client.recognize(config, audio)
+
+    for i, result in enumerate(response.results):
+        alternative = result.alternatives[0]
+        print('-' * 20)
+        print('First alternative of result {}: {}'
+              .format(i, alternative.transcript))
+        print('Speaker Tag for the first word: {}'
+              .format(alternative.words[0].speaker_tag))
+# [END speech_transcribe_diarization]
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
         description=__doc__,
@@ -142,3 +173,5 @@ def transcribe_file_with_auto_punctuation(path):
         transcribe_file_with_metadata(args.path)
     elif args.command == 'punctuation':
         transcribe_file_with_auto_punctuation(args.path)
+    elif args.command == 'diarization':
+        transcribe_file_with_diarization(args.path)
diff --git a/speech/snippets/beta_snippets_test.py b/speech/snippets/beta_snippets_test.py
@@ -14,7 +14,9 @@
 import os
 
 from beta_snippets import (
-    transcribe_file_with_auto_punctuation, transcribe_file_with_enhanced_model,
+    transcribe_file_with_auto_punctuation,
+    transcribe_file_with_diarization,
+    transcribe_file_with_enhanced_model,
     transcribe_file_with_metadata)
 
 RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
@@ -42,3 +44,11 @@ def test_transcribe_file_with_auto_punctuation(capsys):
     out, _ = capsys.readouterr()
 
     assert 'Okay. Sure.' in out
+
+
+def test_transcribe_diarization(capsys):
+    transcribe_file_with_diarization(
+        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    out, err = capsys.readouterr()
+
+    assert 'OK Google stream stranger things from Netflix to my TV' in out
diff --git a/speech/snippets/requirements.txt b/speech/snippets/requirements.txt
@@ -1 +1 @@
-google-cloud-speech==0.33.0
+google-cloud-speech==0.35.0

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-google-cloud-speech==0.33.0`
	`1`	`+google-cloud-speech==0.35.0`