Skip to content

Commit 98f2d89

Browse files
authored
Due to API backend changes, update the samples to match (#1595)
1 parent f951adf commit 98f2d89

File tree

3 files changed

+84
-35
lines changed

3 files changed

+84
-35
lines changed

speech/beta/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
<dependency>
4141
<groupId>com.google.cloud</groupId>
4242
<artifactId>google-cloud-speech</artifactId>
43-
<version>0.56.0-beta</version>
43+
<version>1.20.0</version>
4444
</dependency>
4545
<!-- [END speech_quickstart_dependencies] -->
4646

speech/beta/src/main/java/com/example/speech/Recognize.java

Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,18 @@
2727
import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance;
2828
import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType;
2929
import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
30+
import com.google.cloud.speech.v1p1beta1.SpeakerDiarizationConfig;
3031
import com.google.cloud.speech.v1p1beta1.SpeechClient;
32+
3133
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
3234
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
35+
import com.google.cloud.speech.v1p1beta1.WordInfo;
3336
import com.google.protobuf.ByteString;
3437

3538
import java.nio.file.Files;
3639
import java.nio.file.Path;
3740
import java.nio.file.Paths;
3841
import java.util.ArrayList;
39-
import java.util.List;
4042

4143
public class Recognize {
4244

@@ -154,32 +156,52 @@ public static void transcribeDiarization(String fileName) throws Exception {
154156
RecognitionAudio recognitionAudio =
155157
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
156158

159+
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
160+
.setEnableSpeakerDiarization(true)
161+
.setMinSpeakerCount(2)
162+
.setMaxSpeakerCount(2)
163+
.build();
164+
157165
// Configure request to enable Speaker diarization
158-
RecognitionConfig config =
159-
RecognitionConfig.newBuilder()
166+
RecognitionConfig config = RecognitionConfig.newBuilder()
160167
.setEncoding(AudioEncoding.LINEAR16)
161168
.setLanguageCode("en-US")
162169
.setSampleRateHertz(8000)
163-
.setEnableSpeakerDiarization(true)
164-
.setDiarizationSpeakerCount(2)
170+
.setDiarizationConfig(speakerDiarizationConfig)
165171
.build();
166172

167173
// Perform the transcription request
168174
RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);
169175

170-
// Print out the results
171-
for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {
172-
// There can be several alternative transcripts for a given chunk of speech. Just
173-
// use the first (most likely) one here.
174-
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
175-
System.out.format("Transcript : %s\n", alternative.getTranscript());
176-
// The words array contains the entire transcript up until that point.
177-
// Referencing the last spoken word to get the associated Speaker tag
178-
System.out.format(
179-
"Speaker Tag %s: %s\n",
180-
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
181-
alternative.getTranscript());
176+
// Speaker Tags are only included in the last result object, which has only one alternative.
177+
SpeechRecognitionAlternative alternative =
178+
recognizeResponse.getResults(
179+
recognizeResponse.getResultsCount() - 1).getAlternatives(0);
180+
181+
// The alternative is made up of WordInfo objects that contain the speaker_tag.
182+
WordInfo wordInfo = alternative.getWords(0);
183+
int currentSpeakerTag = wordInfo.getSpeakerTag();
184+
185+
// For each word, get all the words associated with one speaker, once the speaker changes,
186+
// add a new line with the new speaker and their spoken words.
187+
StringBuilder speakerWords = new StringBuilder(
188+
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
189+
190+
for (int i = 1; i < alternative.getWordsCount(); i++) {
191+
wordInfo = alternative.getWords(i);
192+
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
193+
speakerWords.append(" ");
194+
speakerWords.append(wordInfo.getWord());
195+
} else {
196+
speakerWords.append(
197+
String.format("\nSpeaker %d: %s",
198+
wordInfo.getSpeakerTag(),
199+
wordInfo.getWord()));
200+
currentSpeakerTag = wordInfo.getSpeakerTag();
201+
}
182202
}
203+
204+
System.out.println(speakerWords.toString());
183205
}
184206
}
185207
// [END speech_transcribe_diarization_beta]
@@ -192,14 +214,19 @@ public static void transcribeDiarization(String fileName) throws Exception {
192214
*/
193215
public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
194216
try (SpeechClient speechClient = SpeechClient.create()) {
217+
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
218+
.setEnableSpeakerDiarization(true)
219+
.setMinSpeakerCount(2)
220+
.setMaxSpeakerCount(2)
221+
.build();
222+
195223
// Configure request to enable Speaker diarization
196224
RecognitionConfig config =
197225
RecognitionConfig.newBuilder()
198226
.setEncoding(AudioEncoding.LINEAR16)
199227
.setLanguageCode("en-US")
200228
.setSampleRateHertz(8000)
201-
.setEnableSpeakerDiarization(true)
202-
.setDiarizationSpeakerCount(2)
229+
.setDiarizationConfig(speakerDiarizationConfig)
203230
.build();
204231

205232
// Set the remote path for the audio file
@@ -214,17 +241,37 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
214241
Thread.sleep(10000);
215242
}
216243

217-
for (SpeechRecognitionResult result : response.get().getResultsList()) {
218-
// There can be several alternative transcripts for a given chunk of speech. Just
219-
// use the first (most likely) one here.
220-
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
221-
// The words array contains the entire transcript up until that point.
222-
// Referencing the last spoken word to get the associated Speaker tag
223-
System.out.format(
224-
"Speaker Tag %s:%s\n",
225-
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
226-
alternative.getTranscript());
244+
// Speaker Tags are only included in the last result object, which has only one alternative.
245+
LongRunningRecognizeResponse longRunningRecognizeResponse = response.get();
246+
SpeechRecognitionAlternative alternative =
247+
longRunningRecognizeResponse.getResults(
248+
longRunningRecognizeResponse.getResultsCount() - 1)
249+
.getAlternatives(0);
250+
251+
// The alternative is made up of WordInfo objects that contain the speaker_tag.
252+
WordInfo wordInfo = alternative.getWords(0);
253+
int currentSpeakerTag = wordInfo.getSpeakerTag();
254+
255+
// For each word, get all the words associated with one speaker, once the speaker changes,
256+
// add a new line with the new speaker and their spoken words.
257+
StringBuilder speakerWords = new StringBuilder(
258+
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
259+
260+
for (int i = 1; i < alternative.getWordsCount(); i++) {
261+
wordInfo = alternative.getWords(i);
262+
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
263+
speakerWords.append(" ");
264+
speakerWords.append(wordInfo.getWord());
265+
} else {
266+
speakerWords.append(
267+
String.format("\nSpeaker %d: %s",
268+
wordInfo.getSpeakerTag(),
269+
wordInfo.getWord()));
270+
currentSpeakerTag = wordInfo.getSpeakerTag();
271+
}
227272
}
273+
274+
System.out.println(speakerWords.toString());
228275
}
229276
}
230277
// [END speech_transcribe_diarization_gcs_beta]
@@ -454,7 +501,7 @@ public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Except
454501
RecognitionConfig config =
455502
RecognitionConfig.newBuilder()
456503
.setEncoding(AudioEncoding.FLAC)
457-
.setSampleRateHertz(16000)
504+
.setSampleRateHertz(44100)
458505
.setLanguageCode("en-US")
459506
.setEnableWordConfidence(true)
460507
.build();

speech/beta/src/test/java/com/example/speech/RecognizeIT.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
@RunWith(JUnit4.class)
3131
@SuppressWarnings("checkstyle:abbreviationaswordinname")
3232
public class RecognizeIT {
33-
private static final String BUCKET = "cloud-samples-tests";
33+
private static final String BUCKET = "cloud-samples-data";
3434

3535
private ByteArrayOutputStream bout;
3636
private PrintStream out;
@@ -39,7 +39,7 @@ public class RecognizeIT {
3939
private String audioFileName = "./resources/audio.raw";
4040
private String multiChannelAudioFileName = "./resources/commercial_stereo.wav";
4141
private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav";
42-
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac";
42+
private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn_bridge.flac";
4343
private String gcsDiarizationAudioPath = "gs://" + BUCKET + "/speech/commercial_mono.wav";
4444

4545
// The path to the video file to transcribe
@@ -71,14 +71,16 @@ public void testMetadata() throws Exception {
7171
public void testTranscribeDiarization() throws Exception {
7272
Recognize.transcribeDiarization(recognitionAudioFile);
7373
String got = bout.toString();
74-
assertThat(got).contains("Speaker Tag 2:");
74+
assertThat(got).contains("Speaker 1: I'm here");
75+
assertThat(got).contains("Speaker 2: hi I'd like to buy a Chrome Cast");
7576
}
7677

7778
@Test
7879
public void testTranscribeDiarizationGcs() throws Exception {
7980
Recognize.transcribeDiarizationGcs(gcsDiarizationAudioPath);
8081
String got = bout.toString();
81-
assertThat(got).contains("Speaker Tag 2:");
82+
assertThat(got).contains("Speaker 1: I'm here");
83+
assertThat(got).contains("Speaker 2: hi I'd like to buy a Chrome Cast");
8284
}
8385

8486
@Test

0 commit comments

Comments
 (0)