Skip to content

Commit be93c08

Browse files
author
Pietro
authored
Merge pull request #167 from sshniro/master
Adding speech to text adapter for Google cloud platform
2 parents 96f6e6d + 3a74e94 commit be93c08

9 files changed

+107221
-0
lines changed

demo/select-stt-json-type.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const SttTypeSelect = props => {
1818
<option value="vtt-youtube" disabled>Youtube VTT</option>
1919
<option value="amazontranscribe">Amazon Transcribe</option>
2020
<option value="digitalpaperedit">Digital Paper Edit</option>
21+
<option value="google-stt">Google STT</option>
2122
</select>;
2223
};
2324

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import gcpSttToDraft from './index';
2+
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';
3+
4+
console.log('Starting');
5+
console.log(JSON.stringify(gcpSttToDraft(gcpSttTedTalkTranscript), null, 2));
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/**
2+
* Converts GCP Speech to Text Json to DraftJs
3+
* see `sample` folder for example of input and output as well as `example-usage.js`
4+
*/
5+
6+
import generateEntitiesRanges from '../generate-entities-ranges/index.js';
7+
8+
const NANO_SECOND = 1000000000;
9+
10+
/**
11+
* attribute for the sentences object containing the text. eg sentences ={ punct:'helo', ... }
12+
* or eg sentences ={ text:'hello', ... }
13+
* @param sentences
14+
*/
15+
export const getBestAlternativeSentence = sentences => {
16+
if (sentences.alternatives.length === 0) {
17+
return sentences[0];
18+
}
19+
20+
const sentenceWithHighestConfidence = sentences.alternatives.reduce(function(
21+
prev,
22+
current
23+
) {
24+
return parseFloat(prev.confidence) > parseFloat(current.confidence)
25+
? prev
26+
: current;
27+
});
28+
29+
return sentenceWithHighestConfidence;
30+
};
31+
32+
export const trimLeadingAndTailingWhiteSpace = text => {
33+
return text.trim();
34+
};
35+
36+
/**
37+
* GCP does not provide a nanosecond attribute if the word starts at 0 nanosecond
38+
* @param startSecond
39+
* @param nanoSecond
40+
* @returns {number}
41+
*/
42+
const computeTimeInSeconds = (startSecond, nanoSecond) => {
43+
44+
let seconds = parseFloat(startSecond);
45+
46+
if (nanoSecond !== undefined) {
47+
seconds = seconds + parseFloat(nanoSecond / NANO_SECOND);
48+
}
49+
50+
return seconds;
51+
};
52+
53+
/**
54+
* Normalizes words so they can be used in
55+
* the generic generateEntitiesRanges() method
56+
**/
57+
const normalizeWord = (currentWord, confidence) => {
58+
59+
return {
60+
start: computeTimeInSeconds(currentWord.startTime.seconds, currentWord.startTime.nanos),
61+
end: computeTimeInSeconds(currentWord.endTime.seconds, currentWord.endTime.nanos),
62+
text: currentWord.word,
63+
confidence: confidence
64+
};
65+
};
66+
67+
/**
68+
* groups words list from GCP Speech to Text response.
69+
* @param {array} sentences - array of sentence objects from GCP STT
70+
*/
71+
const groupWordsInParagraphs = sentences => {
72+
const results = [];
73+
let paragraph = {
74+
words: [],
75+
text: []
76+
};
77+
78+
sentences.forEach((sentence) => {
79+
const bestAlternative = getBestAlternativeSentence(sentence);
80+
paragraph.text.push(trimLeadingAndTailingWhiteSpace(bestAlternative.transcript));
81+
82+
bestAlternative.words.forEach((word) => {
83+
paragraph.words.push(normalizeWord(word, bestAlternative.confidence));
84+
});
85+
results.push(paragraph);
86+
paragraph = { words: [], text: [] };
87+
});
88+
89+
return results;
90+
};
91+
92+
const gcpSttToDraft = gcpSttJson => {
93+
const results = [];
94+
// const speakerLabels = gcpSttJson.results[0]['alternatives'][0]['words'][0]['speakerTag']
95+
// let speakerSegmentation = typeof(speakerLabels) != 'undefined';
96+
97+
const wordsByParagraphs = groupWordsInParagraphs(gcpSttJson.results);
98+
99+
wordsByParagraphs.forEach((paragraph, i) => {
100+
const draftJsContentBlockParagraph = {
101+
text: paragraph.text.join(' '),
102+
type: 'paragraph',
103+
data: {
104+
speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,
105+
words: paragraph.words,
106+
start: parseFloat(paragraph.words[0].start)
107+
},
108+
// the entities as ranges are each word in the space-joined text,
109+
// so it needs to be compute for each the offset from the beginning of the paragraph and the length
110+
entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName
111+
};
112+
results.push(draftJsContentBlockParagraph);
113+
});
114+
115+
return results;
116+
};
117+
118+
export default gcpSttToDraft;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import gcpSttToDraft, {
2+
getBestAlternativeSentence,
3+
trimLeadingAndTailingWhiteSpace
4+
} from './index';
5+
import draftTranscriptSample from './sample/googleSttToDraftJs.sample.js';
6+
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';
7+
8+
describe('gcpSttToDraft', () => {
9+
const result = gcpSttToDraft(gcpSttTedTalkTranscript);
10+
it('Should be defined', () => {
11+
expect(result).toBeDefined();
12+
});
13+
14+
it('Should be equal to expected value', () => {
15+
expect(result).toEqual(draftTranscriptSample);
16+
});
17+
});
18+
19+
describe('leading and tailing white space should be removed from text block', () => {
20+
const sentence = ' this is a sentence ';
21+
const expected = 'this is a sentence';
22+
23+
const result = trimLeadingAndTailingWhiteSpace(sentence);
24+
it('should be equal to expected value', () => {
25+
expect(result).toEqual(expected);
26+
});
27+
});
28+
29+
describe('Best alternative sentence should be returned', () => {
30+
const sentences = {
31+
alternatives: [
32+
{
33+
'transcript': 'this is the first sentence',
34+
'confidence': 0.95,
35+
},
36+
{
37+
'transcript': 'this is the first sentence alternative',
38+
'confidence': 0.80,
39+
}
40+
]
41+
};
42+
const expected = {
43+
'transcript': 'this is the first sentence',
44+
'confidence': 0.95
45+
};
46+
47+
it('Should be equal to expected value', () => {
48+
49+
const result = getBestAlternativeSentence(sentences);
50+
expect(result).toEqual(expected);
51+
});
52+
});

0 commit comments

Comments
 (0)