Skip to content

Commit d199690

Browse files
murezzdaemettely
authored andcommitted
Speechmatics adapter update (#190)
* Fixed bug in speechmatics adapter: Speaker-time was compared as string values, which resulted in wrong speaker assignment in some cases * Updated speechmatics adapter to create paragraphs according to speaker name instead of end-of-sentence symbols. * Added additional paragraph break rule: 150 words in paragraph and sentence end. * Adjusted comments for groupWordsInParagraphs * Updated test-result for speechmatics-to-draft
1 parent c06416a commit d199690

File tree

2 files changed

+23769
-24805
lines changed

2 files changed

+23769
-24805
lines changed

packages/stt-adapters/speechmatics/index.js

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,6 @@
55

66
import generateEntitiesRanges from '../generate-entities-ranges/index.js';
77

8-
/**
9-
* groups words list from speechmatics based on punctuation.
10-
* @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.
11-
* @todo As this function is also used in the bbc-kaldi adapter, should it be refactored into its own file?
12-
* @param {array} words - array of words objects from speechmatics transcript
13-
*/
14-
const groupWordsInParagraphs = (words) => {
15-
const results = [];
16-
let paragraph = { words: [], text: [] };
17-
18-
words.forEach((word) => {
19-
// if word contains punctuation
20-
if (/[.?!]/.test(word.punct)) {
21-
paragraph.words.push(word);
22-
paragraph.text.push(word.punct);
23-
results.push(paragraph);
24-
// reset paragraph
25-
paragraph = { words: [], text: [] };
26-
} else {
27-
paragraph.words.push(word);
28-
paragraph.text.push(word.punct);
29-
}
30-
});
31-
32-
return results;
33-
};
34-
358
/**
369
* Determines the speaker of a paragraph by comparing the start time of the paragraph with
3710
* the speaker times.
@@ -41,14 +14,49 @@ const groupWordsInParagraphs = (words) => {
4114
const getSpeaker = (start, speakers) => {
4215
for (var speakerIdx in speakers) {
4316
const speaker = speakers[speakerIdx];
44-
if (start >= speaker.start & start < speaker.end) {
17+
const segmentStart = parseFloat(start);
18+
if (segmentStart >= speaker.start & segmentStart < speaker.end) {
4519
return speaker.name;
4620
}
4721
}
4822

4923
return 'UNK';
5024
};
5125

26+
/**
27+
* groups words list from speechmatics based on speaker change and paragraph length.
28+
* @param {array} words - array of words objects from speechmatics transcript
29+
* @param {array} speakers - array of speaker objects from speechmatics transcript
30+
* @param {int} words - number of words which trigger a paragraph break
31+
*/
32+
const groupWordsInParagraphs = (words, speakers, maxParagraphWords) => {
33+
const results = [];
34+
let paragraph = { words: [], text: [], speaker: '' };
35+
let oldSpeaker = getSpeaker(words[0].start, speakers);
36+
let newSpeaker;
37+
let sentenceEnd = false;
38+
39+
words.forEach((word) => {
40+
newSpeaker = getSpeaker(word.start, speakers);
41+
// if speaker changes
42+
if (newSpeaker !== oldSpeaker || (paragraph.words.length > maxParagraphWords && sentenceEnd)) {
43+
paragraph.speaker = oldSpeaker;
44+
results.push(paragraph);
45+
oldSpeaker = newSpeaker;
46+
// reset paragraph
47+
paragraph = { words: [], text: [] };
48+
}
49+
paragraph.words.push(word);
50+
paragraph.text.push(word.punct);
51+
sentenceEnd = /[.?!]/.test(word.punct) ? true : false;
52+
});
53+
54+
paragraph.speaker = oldSpeaker;
55+
results.push(paragraph);
56+
57+
return results;
58+
};
59+
5260
/**
5361
* Speechmatics treats punctuation as own words. This function merges punctuations with
5462
* the pevious word and adjusts the total duration of the word.
@@ -89,21 +97,21 @@ const speechmaticsToDraft = (speechmaticsJson) => {
8997
tmpSpeakers = speechmaticsJson.speakers;
9098
tmpSpeakers = tmpSpeakers.map((element) => {
9199
return ({
92-
start: element.time,
93-
end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
100+
start: parseFloat(element.time),
101+
end: (parseFloat(element.time) + parseFloat(element.duration)),
94102
name: element.name,
95103
});
96104
});
97105

98-
const wordsByParagraphs = groupWordsInParagraphs(tmpWords);
106+
const wordsByParagraphs = groupWordsInParagraphs(tmpWords, tmpSpeakers, 150);
99107

100108
wordsByParagraphs.forEach((paragraph) => {
101109
const paragraphStart = paragraph.words[0].start;
102110
const draftJsContentBlockParagraph = {
103111
text: paragraph.text.join(' '),
104112
type: 'paragraph',
105113
data: {
106-
speaker: getSpeaker(paragraphStart, tmpSpeakers),
114+
speaker: paragraph.speaker,
107115
words: paragraph.words,
108116
start: paragraphStart
109117
},

0 commit comments

Comments
 (0)