1
1
/**
2
- * TODO: remove this and export from @bbc/react-transcript-editor digital-paper-edit STT import draftJs converter
3
-
4
2
edge cases
5
3
- more segments then words - not an issue if you start by matching words with segment
6
4
and handle edge case where it doesn't find a match
7
- - more words then segments - orphan words
5
+ - more words then segments - orphan words?
8
6
*
9
7
* Takes in list of words and list of paragraphs (paragraphs have speakers info associated with it)
10
8
```js
@@ -79,24 +77,18 @@ and handle edge case where it doesn't find a match
79
77
```
80
78
*/
81
79
function groupWordsInParagraphsBySpeakers ( words , segments ) {
82
- // add speakers to each word
83
- // const wordsWithSpeakers = addSpeakerToEachWord(words, segments);
84
- // group words by speakers sequentially
85
- // const result = groupWordsBySpeaker(wordsWithSpeakers);
86
-
87
80
const result = addWordsToSpeakersParagraphs ( words , segments ) ;
88
81
89
82
return result ;
90
83
} ;
91
84
92
85
function addWordsToSpeakersParagraphs ( words , segments ) {
93
- let results = [ ] ;
86
+ const results = [ ] ;
94
87
let currentSegment = 'UKN' ;
95
88
let currentSegmentIndex = 0 ;
96
89
let previousSegmentIndex = 0 ;
97
90
let paragraph = { words : [ ] , text : '' , speaker : '' } ;
98
91
words . forEach ( ( word ) => {
99
- // console.log(word);
100
92
currentSegment = findSegmentForWord ( word , segments ) ;
101
93
// if a segment exists for the word
102
94
if ( currentSegment ) {
@@ -108,31 +100,16 @@ function addWordsToSpeakersParagraphs (words, segments) {
108
100
}
109
101
else {
110
102
previousSegmentIndex = currentSegmentIndex ;
103
+ paragraph . text . trim ( ) ;
111
104
results . push ( paragraph ) ;
112
105
paragraph = { words : [ ] , text : '' , speaker : '' } ;
113
- }
114
- }
115
- // TODO: handling edge case orphan words
116
- // TODO: this needs to be tested/check with input sequence that has
117
- // orphan words
118
- else {
119
- currentSegment = 'UKN' ;
120
- if ( currentSegmentIndex === previousSegmentIndex ) {
121
106
paragraph . words . push ( word ) ;
122
107
paragraph . text += word . text + ' ' ;
123
108
paragraph . speaker = currentSegment . speaker ;
124
109
}
125
- else {
126
- previousSegmentIndex = currentSegmentIndex ;
127
- results . push ( paragraph ) ;
128
- paragraph = { words : [ ] , text : '' , speaker : '' } ;
129
- }
130
110
}
131
111
} ) ;
132
-
133
- results = results . filter ( ( p ) => {
134
- return p . words . length !== 0 ;
135
- } ) ;
112
+ results . push ( paragraph ) ;
136
113
137
114
return results ;
138
115
}
0 commit comments