Skip to content

Commit f0bd8ad

Browse files
authored
Merge branch 'master' into layout-change
2 parents 89a06ad + 910916d commit f0bd8ad

File tree

14 files changed

+54848
-17
lines changed

14 files changed

+54848
-17
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Fork this repository + git clone + cd into folder
4242
npm start
4343
```
4444

45-
Visit [http://localhost:3006](http://localhost:3006)
45+
Visit [http://localhost:3000](http://localhost:3000)
4646

4747

4848
## Usage - production

docs/features-list.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Import Transcript Json - Adapters
5858
- [ ] Gentle Transcription
5959
- [ ] Gentle Alignment Json
6060
- [ ] IBM Watson STT
61-
- [ ] Speechmatics
61+
- [X] Speechmatics
6262
- [ ] AssemblyAI
6363
- [ ] Rev
6464
- [ ] Srt

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@bbc/react-transcript-editor",
33
"description": "A React component to make transcribing audio and video easier and faster.",
4-
"version": "0.2.11",
4+
"version": "0.2.12",
55
"keywords": [
66
"transcript",
77
"transcriptions",
@@ -16,7 +16,7 @@
1616
"dist"
1717
],
1818
"scripts": {
19-
"start": "PORT=3006 react-scripts start",
19+
"start": "react-scripts start",
2020
"test": "react-scripts test --env=jsdom",
2121
"eject": "react-scripts eject",
2222
"build:example": "react-scripts build",

src/index.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ class App extends React.Component {
2222
title: 'Ted Talk Kate Kate Darling',
2323
fileName: 'Kate Darling Ted Talk'
2424
};
25+
26+
this.transcriptEditorRef = React.createRef();
2527
}
2628

2729
loadDemo() {
@@ -100,7 +102,7 @@ class App extends React.Component {
100102

101103
exportTranscript = () => {
102104
// eslint-disable-next-line react/no-string-refs
103-
const { data, ext } = this.refs.transcriptEditor.getEditorContent(this.state.exportFormat);
105+
const { data, ext } = this.transcriptEditorRef.current.getEditorContent(this.state.exportFormat);
104106
this.download(data, `${ this.state.mediaUrl }.${ ext }`);
105107
}
106108

@@ -216,7 +218,7 @@ class App extends React.Component {
216218
sttJsonType={ this.state.sttType }
217219
handleAnalyticsEvents={ this.handleAnalyticsEvents }
218220
title={ this.state.title }
219-
ref={ 'transcriptEditor' }
221+
ref={ this.transcriptEditorRef }
220222
/>
221223
<hr/>
222224
<label>Components Analytics</label>

src/lib/TranscriptEditor/MediaPlayer/PlayerControls.js

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,55 @@ class PlayerControls extends React.Component {
4343
render() {
4444
return (
4545
<div className={ style.playerControls }>
46+
<button
47+
title="Rollback"
48+
className={ style.playerButton }
49+
onClick={ this.props.rollback }>
50+
<FontAwesomeIcon icon={ faUndo } />
51+
</button>
52+
53+
<button
54+
title="Rewind"
55+
className={ style.playerButton }
56+
onMouseDown={ this.setIntervalHelperBackward }
57+
onMouseUp={ this.clearIntervalHelper }>
58+
<FontAwesomeIcon icon={ faBackward } />
59+
60+
</button>
61+
62+
<button
63+
title="Play"
64+
className={ style.playerButton }
65+
onClick={ this.props.playMedia }>
66+
{this.props.isPlaying ? <FontAwesomeIcon icon={ faPause } /> : <FontAwesomeIcon icon={ faPlay } />}
67+
</button>
68+
69+
<button
70+
title="Forward"
71+
className={ style.playerButton }
72+
onMouseDown={ this.setIntervalHelperForward }
73+
onMouseUp={ this.clearIntervalHelper }>
74+
<FontAwesomeIcon icon={ faForward } />
75+
</button>
76+
77+
<span className={ style.playBackRate }>
78+
<Select
79+
title="Playback rate"
80+
options={ this.props.playbackRateOptions }
81+
currentValue={ this.props.playbackRate.toString() }
82+
name={ 'playbackRate' }
83+
handleChange={ this.props.setPlayBackRate } />
84+
</span>
4685

4786
<div className={ style.timeBox }>
48-
<span className={ style.currentTime }
87+
<span title="Current time" className={ style.currentTime }
4988
onClick={ this.props.promptSetCurrentTime }
5089
>{ this.props.currentTime }</span>
5190
<span className={ style.separator }>|</span>
52-
<span className={ style.duration }>{this.props.duration}</span>
91+
<span title="Clip duration" className={ style.duration }>{this.props.duration}</span>
5392
</div>
5493

94+
5595
<div className={ style.btnsGroup }>
5696
<button
5797
className={ style.playerButton }
@@ -103,7 +143,21 @@ class PlayerControls extends React.Component {
103143
>
104144
<FontAwesomeIcon icon={ faTv } />
105145
</button>
106-
</div>
146+
147+
<button
148+
title="Save"
149+
className={ style.playerButton }
150+
onClick={ this.props.handleSaveTranscript }>
151+
<FontAwesomeIcon icon={ faSave } />
152+
</button>
153+
154+
<button
155+
title="Picture-in-picture"
156+
className={ style.playerButton }
157+
onClick={ this.props.pictureInPicture }
158+
>
159+
<FontAwesomeIcon icon={ faTv } />
160+
</button>
107161

108162
<VolumeControl
109163
handleMuteVolume={ this.props.handleMuteVolume }

src/lib/TranscriptEditor/index.js

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class TranscriptEditor extends React.Component {
3636
mediaDuration: '00:00:00:00',
3737
previewViewWidth: '25'
3838
};
39+
this.timedTextEditorRef = React.createRef();
3940
}
4041

4142
static getDerivedStateFromProps(nextProps) {
@@ -76,10 +77,10 @@ class TranscriptEditor extends React.Component {
7677
}
7778

7879
ifPresentRetrieveTranscriptFromLocalStorage = () => {
79-
if (this.refs.timedTextEditor!== undefined) {
80-
if (this.refs.timedTextEditor.isPresentInLocalStorage(this.props.mediaUrl)) {
80+
if (this.timedTextEditorRef.current!== undefined) {
81+
if (this.timedTextEditorRef.current.isPresentInLocalStorage(this.props.mediaUrl)) {
8182
console.info('was already present in local storage');
82-
this.refs.timedTextEditor.loadLocalSavedData(this.props.mediaUrl);
83+
this.timedTextEditorRef.current.loadLocalSavedData(this.props.mediaUrl);
8384
} else {
8485
console.info('not present in local storage');
8586
}
@@ -163,7 +164,7 @@ class TranscriptEditor extends React.Component {
163164
this.setState({ timecodeOffset: timecodeOffset },
164165
() => {
165166
// eslint-disable-next-line react/no-string-refs
166-
this.refs.timedTextEditor.forceUpdate();
167+
this.timedTextEditorRef.current.forceUpdate();
167168
});
168169
}
169170

@@ -226,7 +227,7 @@ class TranscriptEditor extends React.Component {
226227
}
227228

228229
getEditorContent = (exportFormat) => {
229-
return this.refs.timedTextEditor.getEditorContent(exportFormat);
230+
return this.timedTextEditorRef.current.getEditorContent(exportFormat);
230231
}
231232

232233
handlePreviewIsDisplayed = () => {
@@ -263,7 +264,7 @@ class TranscriptEditor extends React.Component {
263264
}
264265

265266
handleSaveTranscript = () => {
266-
return this.refs.timedTextEditor.localSave(this.props.mediaUrl);
267+
return this.timedTextEditorRef.current.localSave(this.props.mediaUrl);
267268
}
268269

269270
render() {
@@ -334,7 +335,7 @@ class TranscriptEditor extends React.Component {
334335
isPauseWhileTypingOn={ this.state.isPauseWhileTypingOn }
335336
showTimecodes={ this.state.showTimecodes }
336337
showSpeakers={ this.state.showSpeakers }
337-
ref={ 'timedTextEditor' }
338+
ref={ this.timedTextEditorRef }
338339
handleAnalyticsEvents={ this.props.handleAnalyticsEvents }
339340
/>;
340341

src/lib/Util/adapters/index.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import bbcKaldiToDraft from './bbc-kaldi/index';
22
import autoEdit2ToDraft from './autoEdit2/index';
3+
import speechmaticsToDraft from './speechmatics/index';
34
/**
45
* Adapters for STT conversion
56
* @param {json} transcriptData - A json transcript with some word accurate timecode
@@ -37,6 +38,10 @@ const sttJsonAdapter = (transcriptData, sttJsonType) => {
3738
case 'autoedit2':
3839
blocks = autoEdit2ToDraft(transcriptData);
3940

41+
return { blocks, entityMap: createEntityMap(blocks) };
42+
case 'speechmatics':
43+
blocks = speechmaticsToDraft(transcriptData);
44+
4045
return { blocks, entityMap: createEntityMap(blocks) };
4146
case 'draftjs':
4247
return transcriptData; // (typeof transcriptData === 'string')? JSON.parse(transcriptData): transcriptData;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
const speechmaticsToDraft = require('./index');
2+
// using require, because of testing outside of React app
3+
const speechmaticsTedTalkTranscript = require('./sample/speechmaticsTedTalkTranscript.sample.json');
4+
5+
const result = speechmaticsToDraft(speechmaticsTedTalkTranscript);
6+
7+
console.log(result);
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/**
2+
* Convert Speechmatics
3+
*/
4+
5+
import generateEntitiesRanges from '../generate-entities-ranges/index.js';
6+
7+
/**
8+
* groups words list from speechmatics based on punctuation.
9+
* @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.
10+
* @todo As this function is also used in the bbc-kaldi adapter, should it be refactored into its own file?
11+
* @param {array} words - array of words objects from speechmatics transcript
12+
*/
13+
14+
const groupWordsInParagraphs = (words) => {
15+
const results = [];
16+
let paragraph = { words: [], text: [] };
17+
18+
words.forEach((word) => {
19+
// if word contains punctuation
20+
if (/[.?!]/.test(word.punct)) {
21+
paragraph.words.push(word);
22+
paragraph.text.push(word.punct);
23+
results.push(paragraph);
24+
// reset paragraph
25+
paragraph = { words: [], text: [] };
26+
} else {
27+
paragraph.words.push(word);
28+
paragraph.text.push(word.punct);
29+
}
30+
});
31+
32+
return results;
33+
};
34+
35+
/**
36+
* Determines the speaker of a paragraph by comparing the start time of the paragraph with
37+
* the speaker times.
38+
* @param {float} start - Starting point of paragraph
39+
* @param {array} speakers - list of all speakers with start and end time
40+
*/
41+
const getSpeaker = (start, speakers) => {
42+
for (var speakerIdx in speakers) {
43+
const speaker = speakers[speakerIdx];
44+
if (start >= speaker.start & start < speaker.end) {
45+
return speaker.name;
46+
}
47+
}
48+
49+
return 'UNK';
50+
};
51+
52+
/**
53+
* Speechmatics treats punctuation as own words. This function merges punctuations with
54+
* the pevious word and adjusts the total duration of the word.
55+
* @param {array} words - array of words objects from speechmatics transcript
56+
*/
57+
const curatePunctuation = (words) => {
58+
const curatedWords = [];
59+
words.forEach((word) => {
60+
if (/[.?!]/.test(word.name)) {
61+
curatedWords[curatedWords.length-1].name = curatedWords[curatedWords.length-1].name + word.name;
62+
curatedWords[curatedWords.length-1].duration = (parseFloat(curatedWords[curatedWords.length-1].duration) + parseFloat(word.duration)).toString();
63+
} else {
64+
curatedWords.push(word);
65+
}
66+
}
67+
);
68+
69+
return curatedWords;
70+
};
71+
72+
const speechmaticsToDraft = (speechmaticsJson) => {
73+
const results = [];
74+
75+
let tmpWords;
76+
tmpWords = curatePunctuation(speechmaticsJson.words);
77+
tmpWords = tmpWords.map((element, index) => {
78+
return ({
79+
start: element.time,
80+
end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
81+
confidence: element.confidence,
82+
word: element.name.toLowerCase().replace(/[.?!]/g, ''),
83+
punct: element.name,
84+
index: index,
85+
});
86+
});
87+
88+
let tmpSpeakers;
89+
tmpSpeakers = speechmaticsJson.speakers;
90+
tmpSpeakers = tmpSpeakers.map((element) => {
91+
return ({
92+
start: element.time,
93+
end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
94+
name: element.name,
95+
});
96+
});
97+
98+
const wordsByParagraphs = groupWordsInParagraphs(tmpWords);
99+
100+
wordsByParagraphs.forEach((paragraph) => {
101+
const paragraphStart = paragraph.words[0].start;
102+
const draftJsContentBlockParagraph = {
103+
text: paragraph.text.join(' '),
104+
type: 'paragraph',
105+
data: {
106+
speaker: getSpeaker(paragraphStart, tmpSpeakers),
107+
words: paragraph.words,
108+
start: paragraphStart
109+
},
110+
// the entities as ranges are each word in the space-joined text,
111+
// so it needs to be compute for each the offset from the beginning of the paragraph and the length
112+
entityRanges: generateEntitiesRanges(paragraph.words, 'punct'), // wordAttributeName
113+
};
114+
results.push(draftJsContentBlockParagraph);
115+
});
116+
117+
return results;
118+
};
119+
120+
export default speechmaticsToDraft;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import speechmaticsToDraft from './index';
2+
3+
import draftTranscriptExample from './sample/speechmaticsToDraft.sample.js';
4+
import speechmaticsTedTalkTranscript from './sample/speechmaticsTedTalkTranscript.sample.json';
5+
6+
// TODO: figure out why the second of these two tests hang
7+
// might need to review the draftJS data structure output
8+
describe('speechmaticsToDraft', () => {
9+
const result = speechmaticsToDraft(speechmaticsTedTalkTranscript);
10+
it('Should be defined', ( ) => {
11+
expect(result).toBeDefined();
12+
});
13+
14+
it('Should be equal to expected value', ( ) => {
15+
expect(result).toEqual(draftTranscriptExample);
16+
});
17+
});

0 commit comments

Comments
 (0)