Skip to content

Commit a5975fe

Browse files
committed
Unicode Normalization API
1 parent 9eccae9 commit a5975fe

35 files changed

+6921
-403
lines changed

stdlib/public/Concurrency/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ set(SWIFT_RUNTIME_CONCURRENCY_SWIFT_SOURCES
151151
ContinuousClock.swift
152152
SuspendingClock.swift
153153
TaskSleepDuration.swift
154+
Unicode+NormalizedScalarsAsync.swift
154155
)
155156

156157
add_swift_target_library(swift_Concurrency ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES} IS_STDLIB
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import Swift
14+
15+
@available(SwiftStdlib 9999, *)
16+
extension AsyncSequence where Element == Unicode.Scalar {
17+
18+
/// Normalized representations of this sequence's contents.
19+
///
20+
@inlinable
21+
public var normalized: Unicode.NormalizedScalars<Self> {
22+
Unicode.NormalizedScalars(self)
23+
}
24+
}
25+
26+
@available(SwiftStdlib 9999, *)
27+
extension Unicode.NormalizedScalars
28+
where Source: AsyncSequence, Source.Element == Unicode.Scalar {
29+
30+
/// The contents of the source sequence, in Normalization Form D.
31+
///
32+
/// Normalization to NFD preserves canonical equivalence.
33+
///
34+
@inlinable
35+
public var nfd: AsyncNFD {
36+
AsyncNFD(source: source)
37+
}
38+
39+
/// The contents of the source sequence, in Normalization Form D.
40+
///
41+
/// Normalization to NFD preserves canonical equivalence.
42+
///
43+
@frozen
44+
public struct AsyncNFD: AsyncSequence {
45+
46+
public var source: Source
47+
48+
@inlinable
49+
internal init(source: Source) {
50+
self.source = source
51+
}
52+
53+
@inlinable
54+
public func makeAsyncIterator() -> AsyncIterator {
55+
AsyncIterator(source: source.makeAsyncIterator())
56+
}
57+
58+
@frozen
59+
public struct AsyncIterator: AsyncIteratorProtocol {
60+
61+
public typealias Element = Unicode.Scalar
62+
public typealias Failure = Source.Failure
63+
64+
public var source: Source.AsyncIterator
65+
66+
@usableFromInline
67+
internal var normalizer = Unicode.NFDNormalizer()
68+
@usableFromInline
69+
internal var pending = Optional<Unicode.Scalar>.none
70+
71+
@inlinable
72+
internal init(source: Source.AsyncIterator) {
73+
self.source = source
74+
}
75+
76+
@inlinable
77+
public mutating func next(
78+
isolation actor: isolated (any Actor)?
79+
) async throws(Source.Failure) -> Unicode.Scalar? {
80+
81+
// Equivalent to: "pending.take() ?? try await source.next()"
82+
func _pendingOrNextFromSource()
83+
async throws(Source.Failure) -> Unicode.Scalar? {
84+
if pending != nil { return pending.take() }
85+
return try await source.next(isolation: actor)
86+
}
87+
88+
while let scalar = try await _pendingOrNextFromSource() {
89+
var iter = CollectionOfOne(scalar).makeIterator()
90+
if let output = normalizer.resume(consuming: &iter) {
91+
pending = iter.next()
92+
return output
93+
}
94+
}
95+
return normalizer.flush()
96+
}
97+
}
98+
}
99+
}
100+
101+
@available(SwiftStdlib 9999, *)
102+
extension Unicode.NormalizedScalars
103+
where Source: AsyncSequence, Source.Element == Unicode.Scalar {
104+
105+
/// The contents of the source sequence, in Normalization Form C.
106+
///
107+
/// Normalization to NFC preserves canonical equivalence.
108+
///
109+
@inlinable
110+
public var nfc: AsyncNFC {
111+
AsyncNFC(source: source)
112+
}
113+
114+
/// The contents of the source sequence, in Normalization Form C.
115+
///
116+
/// Normalization to NFC preserves canonical equivalence.
117+
///
118+
@frozen
119+
public struct AsyncNFC: AsyncSequence {
120+
121+
public var source: Source
122+
123+
@inlinable
124+
internal init(source: Source) {
125+
self.source = source
126+
}
127+
128+
@inlinable
129+
public func makeAsyncIterator() -> AsyncIterator {
130+
AsyncIterator(source: source.makeAsyncIterator())
131+
}
132+
133+
@frozen
134+
public struct AsyncIterator: AsyncIteratorProtocol {
135+
136+
public typealias Element = Unicode.Scalar
137+
public typealias Failure = Source.Failure
138+
139+
public var source: Source.AsyncIterator
140+
141+
@usableFromInline
142+
internal var normalizer = Unicode.NFCNormalizer()
143+
@usableFromInline
144+
internal var pending = Optional<Unicode.Scalar>.none
145+
146+
@inlinable
147+
internal init(source: Source.AsyncIterator) {
148+
self.source = source
149+
}
150+
151+
@inlinable
152+
public mutating func next(
153+
isolation actor: isolated (any Actor)?
154+
) async throws(Source.Failure) -> Unicode.Scalar? {
155+
156+
// Equivalent to: "pending.take() ?? try await source.next()"
157+
func _pendingOrNextFromSource()
158+
async throws(Source.Failure) -> Unicode.Scalar? {
159+
if pending != nil { return pending.take() }
160+
return try await source.next(isolation: actor)
161+
}
162+
163+
while let scalar = try await _pendingOrNextFromSource() {
164+
var iter = CollectionOfOne(scalar).makeIterator()
165+
if let output = normalizer.resume(consuming: &iter) {
166+
pending = iter.next()
167+
return output
168+
}
169+
}
170+
return normalizer.flush()
171+
}
172+
}
173+
}
174+
}
175+
176+
@available(SwiftStdlib 9999, *)
177+
extension Unicode.NormalizedScalars
178+
where Source: AsyncSequence, Source.Element == Unicode.Scalar {
179+
180+
/// The contents of the source sequence, in Normalization Form KD.
181+
///
182+
/// Normalization to NFKD does _not_ preserve canonical equivalence.
183+
///
184+
@inlinable
185+
public var nfkd: AsyncNFKD {
186+
AsyncNFKD(source: source)
187+
}
188+
189+
/// The contents of the source sequence, in Normalization Form KD.
190+
///
191+
/// Normalization to NFKD does _not_ preserve canonical equivalence.
192+
///
193+
@frozen
194+
public struct AsyncNFKD: AsyncSequence {
195+
196+
public var source: Source
197+
198+
@inlinable
199+
internal init(source: Source) {
200+
self.source = source
201+
}
202+
203+
@inlinable
204+
public func makeAsyncIterator() -> AsyncIterator {
205+
AsyncIterator(source: source.makeAsyncIterator())
206+
}
207+
208+
@frozen
209+
public struct AsyncIterator: AsyncIteratorProtocol {
210+
211+
public typealias Element = Unicode.Scalar
212+
public typealias Failure = Source.Failure
213+
214+
public var source: Source.AsyncIterator
215+
216+
@usableFromInline
217+
internal var normalizer = Unicode.NFKDNormalizer()
218+
@usableFromInline
219+
internal var pending = Optional<Unicode.Scalar>.none
220+
221+
@inlinable
222+
internal init(source: Source.AsyncIterator) {
223+
self.source = source
224+
}
225+
226+
@inlinable
227+
public mutating func next(
228+
isolation actor: isolated (any Actor)?
229+
) async throws(Source.Failure) -> Unicode.Scalar? {
230+
231+
// Equivalent to: "pending.take() ?? try await source.next()"
232+
func _pendingOrNextFromSource()
233+
async throws(Source.Failure) -> Unicode.Scalar? {
234+
if pending != nil { return pending.take() }
235+
return try await source.next(isolation: actor)
236+
}
237+
238+
while let scalar = try await _pendingOrNextFromSource() {
239+
var iter = CollectionOfOne(scalar).makeIterator()
240+
if let output = normalizer.resume(consuming: &iter) {
241+
pending = iter.next()
242+
return output
243+
}
244+
}
245+
return normalizer.flush()
246+
}
247+
}
248+
}
249+
}
250+
251+
@available(SwiftStdlib 9999, *)
252+
extension Unicode.NormalizedScalars
253+
where Source: AsyncSequence, Source.Element == Unicode.Scalar {
254+
255+
/// The contents of the source sequence, in Normalization Form KC.
256+
///
257+
/// Normalization to NFKC does _not_ preserve canonical equivalence.
258+
///
259+
@inlinable
260+
public var nfkc: AsyncNFKC {
261+
AsyncNFKC(source: source)
262+
}
263+
264+
/// The contents of the source sequence, in Normalization Form KC.
265+
///
266+
/// Normalization to NFKC does _not_ preserve canonical equivalence.
267+
///
268+
@frozen
269+
public struct AsyncNFKC: AsyncSequence {
270+
271+
public var source: Source
272+
273+
@inlinable
274+
internal init(source: Source) {
275+
self.source = source
276+
}
277+
278+
@inlinable
279+
public func makeAsyncIterator() -> AsyncIterator {
280+
AsyncIterator(source: source.makeAsyncIterator())
281+
}
282+
283+
@frozen
284+
public struct AsyncIterator: AsyncIteratorProtocol {
285+
286+
public typealias Element = Unicode.Scalar
287+
public typealias Failure = Source.Failure
288+
289+
public var source: Source.AsyncIterator
290+
291+
@usableFromInline
292+
internal var normalizer = Unicode.NFKCNormalizer()
293+
@usableFromInline
294+
internal var pending = Optional<Unicode.Scalar>.none
295+
296+
@inlinable
297+
internal init(source: Source.AsyncIterator) {
298+
self.source = source
299+
}
300+
301+
@inlinable
302+
public mutating func next(
303+
isolation actor: isolated (any Actor)?
304+
) async throws(Source.Failure) -> Unicode.Scalar? {
305+
306+
// Equivalent to: "pending.take() ?? try await source.next()"
307+
func _pendingOrNextFromSource()
308+
async throws(Source.Failure) -> Unicode.Scalar? {
309+
if pending != nil { return pending.take() }
310+
return try await source.next(isolation: actor)
311+
}
312+
313+
while let scalar = try await _pendingOrNextFromSource() {
314+
var iter = CollectionOfOne(scalar).makeIterator()
315+
if let output = normalizer.resume(consuming: &iter) {
316+
pending = iter.next()
317+
return output
318+
}
319+
}
320+
return normalizer.flush()
321+
}
322+
}
323+
}
324+
}

stdlib/public/SwiftShims/swift/shims/UnicodeData.h

+9
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,21 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
4444
SWIFT_RUNTIME_STDLIB_INTERNAL
4545
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar);
4646

47+
SWIFT_RUNTIME_STDLIB_INTERNAL
48+
__swift_uint16_t _swift_stdlib_getCompatibilityNormData(__swift_uint32_t scalar);
49+
4750
SWIFT_RUNTIME_STDLIB_INTERNAL
4851
const __swift_uint8_t * const _swift_stdlib_nfd_decompositions;
4952

5053
SWIFT_RUNTIME_STDLIB_INTERNAL
5154
__swift_uint32_t _swift_stdlib_getDecompositionEntry(__swift_uint32_t scalar);
5255

56+
SWIFT_RUNTIME_STDLIB_INTERNAL
57+
const __swift_uint8_t * const _swift_stdlib_nfkd_decompositions;
58+
59+
SWIFT_RUNTIME_STDLIB_INTERNAL
60+
__swift_uint32_t _swift_stdlib_getCompatibilityDecompositionEntry(__swift_uint32_t scalar);
61+
5362
SWIFT_RUNTIME_STDLIB_INTERNAL
5463
__swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
5564
__swift_uint32_t y);

stdlib/public/core/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ split_embedded_sources(
116116
NORMAL NewtypeWrapper.swift
117117
EMBEDDED NFC.swift
118118
EMBEDDED NFD.swift
119+
EMBEDDED NFKC.swift
120+
EMBEDDED NFKD.swift
119121
EMBEDDED ObjectIdentifier.swift
120122
EMBEDDED Optional.swift
121123
EMBEDDED OptionSet.swift
@@ -193,6 +195,8 @@ split_embedded_sources(
193195
NORMAL ThreadLocalStorage.swift
194196
EMBEDDED UIntBuffer.swift
195197
EMBEDDED UnavailableStringAPIs.swift
198+
EMBEDDED Unicode+NormalizedScalars.swift
199+
EMBEDDED UnicodeNormalizationCheck.swift
196200
EMBEDDED UnicodeData.swift
197201
EMBEDDED UnicodeEncoding.swift
198202
EMBEDDED UnicodeBreakProperty.swift

0 commit comments

Comments
 (0)