Merge branch 'master' into layout-change

pietrop · web-flow · commit f0bd8ad4919c · 2019-02-18T14:04:11.000Z
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ Fork this repository + git clone + cd into folder
 npm start
 ```
 
-Visit [http://localhost:3006](http://localhost:3006)
+Visit [http://localhost:3000](http://localhost:3000)
 
 
 ## Usage - production
diff --git a/docs/features-list.md b/docs/features-list.md
@@ -58,7 +58,7 @@ Import Transcript Json - Adapters
 - [ ] Gentle Transcription 
 - [ ] Gentle Alignment Json
 - [ ] IBM Watson STT
-- [ ] Speechmatics
+- [X] Speechmatics
 - [ ] AssemblyAI
 - [ ] Rev
 - [ ] Srt
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@bbc/react-transcript-editor",
   "description": "A React component to make transcribing audio and video easier and faster.",
-  "version": "0.2.11",
+  "version": "0.2.12",
   "keywords": [
     "transcript",
     "transcriptions",
@@ -16,7 +16,7 @@
     "dist"
   ],
   "scripts": {
-    "start": "PORT=3006 react-scripts start",
+    "start": "react-scripts start",
     "test": "react-scripts test --env=jsdom",
     "eject": "react-scripts eject",
     "build:example": "react-scripts build",
diff --git a/src/index.js b/src/index.js
@@ -22,6 +22,8 @@ class App extends React.Component {
       title: 'Ted Talk Kate Kate Darling',
       fileName: 'Kate Darling Ted Talk'
     };
+
+    this.transcriptEditorRef = React.createRef();
   }
 
   loadDemo() {
@@ -100,7 +102,7 @@ class App extends React.Component {
 
   exportTranscript = () => {
     // eslint-disable-next-line react/no-string-refs
-    const { data, ext } = this.refs.transcriptEditor.getEditorContent(this.state.exportFormat);
+    const { data, ext } = this.transcriptEditorRef.current.getEditorContent(this.state.exportFormat);
     this.download(data, `${ this.state.mediaUrl }.${ ext }`);
   }
 
@@ -216,7 +218,7 @@ class App extends React.Component {
            sttJsonType={ this.state.sttType }
            handleAnalyticsEvents={ this.handleAnalyticsEvents }
            title={ this.state.title }
-           ref={ 'transcriptEditor' }
+           ref={ this.transcriptEditorRef }
          />
          <hr/>
          <label>Components Analytics</label>
diff --git a/src/lib/TranscriptEditor/MediaPlayer/PlayerControls.js b/src/lib/TranscriptEditor/MediaPlayer/PlayerControls.js
@@ -43,15 +43,55 @@ class PlayerControls extends React.Component {
   render() {
     return (
       <div className={ style.playerControls }>
+        <button
+          title="Rollback"
+          className={ style.playerButton }
+          onClick={ this.props.rollback }>
+          <FontAwesomeIcon icon={ faUndo } />
+        </button>
+
+        <button
+          title="Rewind"
+          className={ style.playerButton }
+          onMouseDown={ this.setIntervalHelperBackward }
+          onMouseUp={ this.clearIntervalHelper }>
+          <FontAwesomeIcon icon={ faBackward } />
+
+        </button>
+
+        <button
+          title="Play"
+          className={ style.playerButton }
+          onClick={ this.props.playMedia }>
+          {this.props.isPlaying ? <FontAwesomeIcon icon={ faPause } /> : <FontAwesomeIcon icon={ faPlay } />}
+        </button>
+
+        <button
+          title="Forward"
+          className={ style.playerButton }
+          onMouseDown={ this.setIntervalHelperForward }
+          onMouseUp={ this.clearIntervalHelper }>
+          <FontAwesomeIcon icon={ faForward } />
+        </button>
+
+        <span className={ style.playBackRate }>
+          <Select
+            title="Playback rate"
+            options={ this.props.playbackRateOptions }
+            currentValue={ this.props.playbackRate.toString() }
+            name={ 'playbackRate' }
+            handleChange={ this.props.setPlayBackRate } />
+        </span>
 
         <div className={ style.timeBox }>
-          <span className={ style.currentTime }
+          <span title="Current time" className={ style.currentTime }
             onClick={ this.props.promptSetCurrentTime }
           >{ this.props.currentTime }</span>
           <span className={ style.separator }>|</span>
-          <span className={ style.duration }>{this.props.duration}</span>
+          <span title="Clip duration" className={ style.duration }>{this.props.duration}</span>
         </div>
 
+
         <div className={ style.btnsGroup }>
           <button
             className={ style.playerButton }
@@ -103,7 +143,21 @@ class PlayerControls extends React.Component {
           >
             <FontAwesomeIcon icon={ faTv } />
           </button>
-        </div>
+
+        <button
+          title="Save"
+          className={ style.playerButton }
+          onClick={ this.props.handleSaveTranscript }>
+          <FontAwesomeIcon icon={ faSave } />
+        </button>
+
+        <button
+          title="Picture-in-picture"
+          className={ style.playerButton }
+          onClick={ this.props.pictureInPicture }
+        >
+          <FontAwesomeIcon icon={ faTv } />
+        </button>
 
         <VolumeControl
           handleMuteVolume={ this.props.handleMuteVolume }
diff --git a/src/lib/TranscriptEditor/index.js b/src/lib/TranscriptEditor/index.js
@@ -36,6 +36,7 @@ class TranscriptEditor extends React.Component {
       mediaDuration: '00:00:00:00',
       previewViewWidth: '25'
     };
+    this.timedTextEditorRef = React.createRef();
   }
 
   static getDerivedStateFromProps(nextProps) {
@@ -76,10 +77,10 @@ class TranscriptEditor extends React.Component {
   }
 
   ifPresentRetrieveTranscriptFromLocalStorage = () => {
-    if (this.refs.timedTextEditor!== undefined) {
-      if (this.refs.timedTextEditor.isPresentInLocalStorage(this.props.mediaUrl)) {
+    if (this.timedTextEditorRef.current!== undefined) {
+      if (this.timedTextEditorRef.current.isPresentInLocalStorage(this.props.mediaUrl)) {
         console.info('was already present in local storage');
-        this.refs.timedTextEditor.loadLocalSavedData(this.props.mediaUrl);
+        this.timedTextEditorRef.current.loadLocalSavedData(this.props.mediaUrl);
       } else {
         console.info('not present in local storage');
       }
@@ -163,7 +164,7 @@ class TranscriptEditor extends React.Component {
     this.setState({ timecodeOffset: timecodeOffset },
       () => {
         // eslint-disable-next-line react/no-string-refs
-        this.refs.timedTextEditor.forceUpdate();
+        this.timedTextEditorRef.current.forceUpdate();
       });
   }
 
@@ -226,7 +227,7 @@ class TranscriptEditor extends React.Component {
   }
 
   getEditorContent = (exportFormat) => {
-    return this.refs.timedTextEditor.getEditorContent(exportFormat);
+    return this.timedTextEditorRef.current.getEditorContent(exportFormat);
   }
 
   handlePreviewIsDisplayed = () => {
@@ -263,7 +264,7 @@ class TranscriptEditor extends React.Component {
   }
 
   handleSaveTranscript = () => {
-    return this.refs.timedTextEditor.localSave(this.props.mediaUrl);
+    return this.timedTextEditorRef.current.localSave(this.props.mediaUrl);
   }
 
   render() {
@@ -334,7 +335,7 @@ class TranscriptEditor extends React.Component {
       isPauseWhileTypingOn={ this.state.isPauseWhileTypingOn }
       showTimecodes={ this.state.showTimecodes }
       showSpeakers={ this.state.showSpeakers }
-      ref={ 'timedTextEditor' }
+      ref={ this.timedTextEditorRef }
       handleAnalyticsEvents={ this.props.handleAnalyticsEvents }
     />;
 
diff --git a/src/lib/Util/adapters/index.js b/src/lib/Util/adapters/index.js
@@ -1,5 +1,6 @@
 import bbcKaldiToDraft from './bbc-kaldi/index';
 import autoEdit2ToDraft from './autoEdit2/index';
+import speechmaticsToDraft from './speechmatics/index';
 /**
  * Adapters for STT conversion
  * @param {json} transcriptData - A json transcript with some word accurate timecode
@@ -37,6 +38,10 @@ const sttJsonAdapter = (transcriptData, sttJsonType) => {
   case 'autoedit2':
     blocks = autoEdit2ToDraft(transcriptData);
 
+    return { blocks, entityMap: createEntityMap(blocks) };
+  case 'speechmatics':
+    blocks = speechmaticsToDraft(transcriptData);
+
     return { blocks, entityMap: createEntityMap(blocks) };
   case 'draftjs':
     return transcriptData; // (typeof transcriptData === 'string')? JSON.parse(transcriptData): transcriptData;
diff --git a/src/lib/Util/adapters/speechmatics/example-usage.js b/src/lib/Util/adapters/speechmatics/example-usage.js
@@ -0,0 +1,7 @@
+const speechmaticsToDraft = require('./index');
+// using require, because of testing outside of React app
+const speechmaticsTedTalkTranscript = require('./sample/speechmaticsTedTalkTranscript.sample.json');
+
+const result = speechmaticsToDraft(speechmaticsTedTalkTranscript);
+
+console.log(result);
diff --git a/src/lib/Util/adapters/speechmatics/index.js b/src/lib/Util/adapters/speechmatics/index.js
@@ -0,0 +1,120 @@
+/**
+ *  Convert Speechmatics
+ */
+
+import generateEntitiesRanges from '../generate-entities-ranges/index.js';
+
+/**
+ * groups words list from speechmatics based on punctuation.
+ * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.
+ * @todo As this function is also used in the bbc-kaldi adapter, should it be refactored into its own file?
+ * @param {array} words - array of words objects from speechmatics transcript
+ */
+
+const groupWordsInParagraphs = (words) => {
+  const results = [];
+  let paragraph = { words: [], text: [] };
+
+  words.forEach((word) => {
+    // if word contains punctuation
+    if (/[.?!]/.test(word.punct)) {
+      paragraph.words.push(word);
+      paragraph.text.push(word.punct);
+      results.push(paragraph);
+      // reset paragraph
+      paragraph = { words: [], text: [] };
+    } else {
+      paragraph.words.push(word);
+      paragraph.text.push(word.punct);
+    }
+  });
+
+  return results;
+};
+
+/**
+ * Determines the speaker of a paragraph by comparing the start time of the paragraph with
+ * the speaker times.
+ * @param {float} start - Starting point of paragraph
+ * @param {array} speakers - list of all speakers with start and end time
+ */
+const getSpeaker = (start, speakers) => {
+  for (var speakerIdx in speakers) {
+    const speaker = speakers[speakerIdx];
+    if (start >= speaker.start & start < speaker.end) {
+      return speaker.name;
+    }
+  }
+
+  return 'UNK';
+};
+
+/**
+ * Speechmatics treats punctuation as own words. This function merges punctuations with
+ * the pevious word and adjusts the total duration of the word.
+ * @param {array} words - array of words objects from speechmatics transcript
+ */
+const curatePunctuation = (words) => {
+  const curatedWords = [];
+  words.forEach((word) => {
+    if (/[.?!]/.test(word.name)) {
+      curatedWords[curatedWords.length-1].name = curatedWords[curatedWords.length-1].name + word.name;
+      curatedWords[curatedWords.length-1].duration = (parseFloat(curatedWords[curatedWords.length-1].duration) + parseFloat(word.duration)).toString();
+    } else {
+      curatedWords.push(word);
+    }
+  }
+  );
+
+  return curatedWords;
+};
+
+const speechmaticsToDraft = (speechmaticsJson) => {
+  const results = [];
+
+  let tmpWords;
+  tmpWords = curatePunctuation(speechmaticsJson.words);
+  tmpWords = tmpWords.map((element, index) => {
+    return ({
+      start: element.time,
+      end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
+      confidence: element.confidence,
+      word: element.name.toLowerCase().replace(/[.?!]/g, ''),
+      punct: element.name,
+      index: index,
+    });
+  });
+
+  let tmpSpeakers;
+  tmpSpeakers = speechmaticsJson.speakers;
+  tmpSpeakers = tmpSpeakers.map((element) => {
+    return ({
+      start: element.time,
+      end: (parseFloat(element.time) + parseFloat(element.duration)).toString(),
+      name: element.name,
+    });
+  });
+
+  const wordsByParagraphs = groupWordsInParagraphs(tmpWords);
+
+  wordsByParagraphs.forEach((paragraph) => {
+    const paragraphStart = paragraph.words[0].start;
+    const draftJsContentBlockParagraph = {
+      text: paragraph.text.join(' '),
+      type: 'paragraph',
+      data: {
+        speaker: getSpeaker(paragraphStart, tmpSpeakers),
+        words: paragraph.words,
+        start: paragraphStart
+      },
+      // the entities as ranges are each word in the space-joined text,
+      // so it needs to be compute for each the offset from the beginning of the paragraph and the length
+      entityRanges: generateEntitiesRanges(paragraph.words, 'punct'), // wordAttributeName
+    };
+    results.push(draftJsContentBlockParagraph);
+  });
+
+  return results;
+};
+
+export default speechmaticsToDraft;
diff --git a/src/lib/Util/adapters/speechmatics/index.test.js b/src/lib/Util/adapters/speechmatics/index.test.js
@@ -0,0 +1,17 @@
+import speechmaticsToDraft from './index';
+
+import draftTranscriptExample from './sample/speechmaticsToDraft.sample.js';
+import speechmaticsTedTalkTranscript from './sample/speechmaticsTedTalkTranscript.sample.json';
+
+// TODO: figure out why the second of these two tests hang
+// might need to review the draftJS data structure output
+describe('speechmaticsToDraft', () => {
+  const result = speechmaticsToDraft(speechmaticsTedTalkTranscript);
+  it('Should be defined', ( ) => {
+    expect(result).toBeDefined();
+  });
+
+  it('Should be equal to expected value', ( ) => {
+    expect(result).toEqual(draftTranscriptExample);
+  });
+});
diff --git a/src/lib/Util/adapters/speechmatics/sample/speechmaticsTedTalkTranscript.sample.json b/src/lib/Util/adapters/speechmatics/sample/speechmaticsTedTalkTranscript.sample.json
diff --git a/src/lib/Util/adapters/speechmatics/sample/speechmaticsToDraft.sample.js b/src/lib/Util/adapters/speechmatics/sample/speechmaticsToDraft.sample.js
diff --git a/src/sample-data/KateDarling_2018S-speechmatics.json b/src/sample-data/KateDarling_2018S-speechmatics.json
diff --git a/src/select-stt-json-type.js b/src/select-stt-json-type.js