@@ -99,24 +99,14 @@ extension _StringGuts {
99
99
@usableFromInline @inline ( never)
100
100
@_effects ( releasenone)
101
101
internal func _opaqueCharacterStride( startingAt i: Int ) -> Int {
102
- let nextIdx : Int
103
-
104
102
if _slowPath ( isForeign) {
105
- nextIdx = nextBoundary ( startingAt: i) {
106
- let scalars = String . UnicodeScalarView ( self )
107
- let idx = String . Index ( _encodedOffset: $0)
108
-
109
- let scalar = scalars [ idx]
110
- let nextIdx = scalars. index ( after: idx)
103
+ return _foreignOpaqueCharacterStride ( startingAt: i)
104
+ }
111
105
112
- return ( scalar, nextIdx. _encodedOffset)
113
- }
114
- } else {
115
- nextIdx = withFastUTF8 { utf8 in
116
- nextBoundary ( startingAt: i) {
117
- let ( scalar, len) = _decodeScalar ( utf8, startingAt: $0)
118
- return ( scalar, $0 &+ len)
119
- }
106
+ let nextIdx = withFastUTF8 { utf8 in
107
+ nextBoundary ( startingAt: i) {
108
+ let ( scalar, len) = _decodeScalar ( utf8, startingAt: $0)
109
+ return ( scalar, $0 &+ len)
120
110
}
121
111
}
122
112
@@ -126,45 +116,83 @@ extension _StringGuts {
126
116
@usableFromInline @inline ( never)
127
117
@_effects ( releasenone)
128
118
internal func _opaqueCharacterStride( endingAt i: Int ) -> Int {
129
- let previousIdx : Int
130
-
131
119
if _slowPath ( isForeign) {
132
- previousIdx = previousBoundary ( endingAt: i) {
133
- let scalars = String . UnicodeScalarView ( self )
134
- let idx = String . Index ( _encodedOffset: $0)
135
-
136
- let previousIdx = scalars. index ( before: idx)
137
- let scalar = scalars [ previousIdx]
120
+ return _foreignOpaqueCharacterStride ( endingAt: i)
121
+ }
138
122
139
- return ( scalar, previousIdx. _encodedOffset)
140
- }
141
- } else {
142
- previousIdx = withFastUTF8 { utf8 in
143
- previousBoundary ( endingAt: i) {
144
- let ( scalar, len) = _decodeScalar ( utf8, endingAt: $0)
145
- return ( scalar, $0 &- len)
146
- }
123
+ let previousIdx = withFastUTF8 { utf8 in
124
+ previousBoundary ( endingAt: i) {
125
+ let ( scalar, len) = _decodeScalar ( utf8, endingAt: $0)
126
+ return ( scalar, $0 &- len)
147
127
}
148
128
}
149
129
150
130
return i &- previousIdx
151
131
}
152
- }
153
132
154
- internal struct _GraphemeBreakingState {
155
- var isBackwards : Bool = false
156
- var isInEmojiSequence : Bool = false
157
- var shouldBreakRI : Bool = false
133
+ @inline ( never)
134
+ @_effects ( releasenone)
135
+ private func _foreignOpaqueCharacterStride( startingAt i: Int ) -> Int {
136
+ #if _runtime(_ObjC)
137
+ _internalInvariant ( isForeign)
138
+
139
+ let nextIdx = nextBoundary ( startingAt: i) {
140
+ let scalars = String . UnicodeScalarView ( self )
141
+ let idx = String . Index ( _encodedOffset: $0)
142
+
143
+ let scalar = scalars [ idx]
144
+ let nextIdx = scalars. index ( after: idx)
158
145
159
- static func forward( ) -> _GraphemeBreakingState {
160
- _GraphemeBreakingState ( )
146
+ return ( scalar, nextIdx. _encodedOffset)
147
+ }
148
+
149
+ return nextIdx &- i
150
+ #else
151
+ fatalError ( " No foreign strings on Linux in this version of Swift " )
152
+ #endif
161
153
}
162
154
163
- static func backward( ) -> _GraphemeBreakingState {
164
- _GraphemeBreakingState ( isBackwards: true )
155
+ @inline ( never)
156
+ @_effects ( releasenone)
157
+ private func _foreignOpaqueCharacterStride( endingAt i: Int ) -> Int {
158
+ #if _runtime(_ObjC)
159
+ _internalInvariant ( isForeign)
160
+
161
+ let previousIdx = previousBoundary ( endingAt: i) {
162
+ let scalars = String . UnicodeScalarView ( self )
163
+ let idx = String . Index ( _encodedOffset: $0)
164
+
165
+ let previousIdx = scalars. index ( before: idx)
166
+ let scalar = scalars [ previousIdx]
167
+
168
+ return ( scalar, previousIdx. _encodedOffset)
169
+ }
170
+
171
+ return i &- previousIdx
172
+ #else
173
+ fatalError ( " No foreign strings on Linux in this version of Swift " )
174
+ #endif
165
175
}
166
176
}
167
177
178
+ internal struct _GraphemeBreakingState {
179
+ // When walking forwards in a string, we need to know whether or not we've
180
+ // entered an emoji sequence to be able to eventually break after all of the
181
+ // emoji's various extenders and zero width joiners. This bit allows us to
182
+ // keep track of whether or not we're still in an emoji sequence when deciding
183
+ // to break.
184
+ var isInEmojiSequence : Bool = false
185
+
186
+ // When walking forward in a string, we need to not break on emoji flag
187
+ // sequences. Emoji flag sequences are composed of 2 regional indicators, so
188
+ // when we see our first (.regionalIndicator, .regionalIndicator) decision,
189
+ // we need to know to return false in this case. However, if the next scalar
190
+ // is another regional indicator, we reach the same decision rule, but in this
191
+ // case we actually need to break there's a boundary between emoji flag
192
+ // sequences.
193
+ var shouldBreakRI : Bool = false
194
+ }
195
+
168
196
extension _StringGuts {
169
197
// Returns the stride of the next grapheme cluster at the previous boundary
170
198
// offset.
@@ -173,7 +201,7 @@ extension _StringGuts {
173
201
nextScalar: ( Int ) -> ( Unicode . Scalar , end: Int )
174
202
) -> Int {
175
203
_internalInvariant ( index != endIndex. _encodedOffset)
176
- var state = _GraphemeBreakingState. forward ( )
204
+ var state = _GraphemeBreakingState ( )
177
205
var index = index
178
206
179
207
while true {
@@ -201,7 +229,7 @@ extension _StringGuts {
201
229
previousScalar: ( Int ) -> ( Unicode . Scalar , start: Int )
202
230
) -> Int {
203
231
_internalInvariant ( index != startIndex. _encodedOffset)
204
- var state = _GraphemeBreakingState. backward ( )
232
+ var state = _GraphemeBreakingState ( )
205
233
var index = index
206
234
207
235
while true {
@@ -214,7 +242,13 @@ extension _StringGuts {
214
242
215
243
let ( scalar1, _) = previousScalar ( index)
216
244
217
- if shouldBreak ( scalar1, between: scalar2, & state, index) {
245
+ if shouldBreak (
246
+ scalar1,
247
+ between: scalar2,
248
+ & state,
249
+ index,
250
+ isBackwards: true
251
+ ) {
218
252
break
219
253
}
220
254
}
@@ -233,7 +267,8 @@ extension _StringGuts {
233
267
_ scalar1: Unicode . Scalar ,
234
268
between scalar2: Unicode . Scalar ,
235
269
_ state: inout _GraphemeBreakingState ,
236
- _ index: Int
270
+ _ index: Int ,
271
+ isBackwards: Bool = false
237
272
) -> Bool {
238
273
// GB3
239
274
if scalar1. value == 0xD , scalar2. value == 0xA {
@@ -315,24 +350,23 @@ extension _StringGuts {
315
350
316
351
// GB11
317
352
case ( . zwj, . extendedPictographic) :
318
- if state . isBackwards {
319
- checkIfInEmojiSequence ( & state , index)
353
+ if isBackwards {
354
+ return ! checkIfInEmojiSequence( index)
320
355
}
321
356
322
- if state. isInEmojiSequence {
323
- return false
324
- } else {
325
- return true
326
- }
357
+ return !state. isInEmojiSequence
327
358
328
359
// GB12 & GB13
329
360
case ( . regionalIndicator, . regionalIndicator) :
330
- if state . isBackwards {
331
- countRIs ( & state , index)
361
+ if isBackwards {
362
+ return countRIs ( index)
332
363
}
333
364
334
- state. shouldBreakRI. toggle ( )
335
- return !state. shouldBreakRI
365
+ defer {
366
+ state. shouldBreakRI. toggle ( )
367
+ }
368
+
369
+ return state. shouldBreakRI
336
370
337
371
// GB999
338
372
default :
@@ -384,14 +418,12 @@ extension _StringGuts {
384
418
// know that we are in an emoji sequence so our initial
385
419
// break question is answered as NO.
386
420
internal func checkIfInEmojiSequence(
387
- _ state: inout _GraphemeBreakingState ,
388
421
_ index: Int
389
- ) {
422
+ ) -> Bool {
390
423
var emojiIdx = String . Index ( _encodedOffset: index)
391
424
392
425
guard emojiIdx != startIndex else {
393
- state. isInEmojiSequence = false
394
- return
426
+ return false
395
427
}
396
428
397
429
let scalars = String . UnicodeScalarView ( self )
@@ -407,13 +439,13 @@ extension _StringGuts {
407
439
case . extend:
408
440
continue
409
441
case . extendedPictographic:
410
- state. isInEmojiSequence = true
411
- return
442
+ return true
412
443
default :
413
- state. isInEmojiSequence = false
414
- return
444
+ return false
415
445
}
416
446
}
447
+
448
+ return false
417
449
}
418
450
419
451
// When walking backwards, it's impossible to know whether we break when we
@@ -447,14 +479,12 @@ extension _StringGuts {
447
479
// | = Not a .regionalIndicator. riCount = 1 which is odd, so break
448
480
// the last two .regionalIndicators.
449
481
internal func countRIs(
450
- _ state: inout _GraphemeBreakingState ,
451
482
_ index: Int
452
- ) {
483
+ ) -> Bool {
453
484
var riIdx = String . Index ( _encodedOffset: index)
454
485
455
486
guard riIdx != startIndex else {
456
- state. shouldBreakRI = false
457
- return
487
+ return false
458
488
}
459
489
460
490
var riCount = 0
@@ -475,6 +505,6 @@ extension _StringGuts {
475
505
riCount += 1
476
506
}
477
507
478
- state . shouldBreakRI = riCount & 1 != 0
508
+ return riCount & 1 != 0
479
509
}
480
510
}
0 commit comments