Skip to content

Commit b189084

Browse files
authored
[utils] Add a Unicode data generator util package (#39213)
* Add a Unicode data generator util package * Add a common GenUtils library * Use string methods for reading/writing files
1 parent 3bad9a0 commit b189084

File tree

9 files changed

+467
-0
lines changed

9 files changed

+467
-0
lines changed

utils/gen-unicode-data/.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.DS_Store
2+
/.build
3+
/Packages
4+
/*.xcodeproj
5+
xcuserdata/
6+
DerivedData/
7+
.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
8+
Package.resolved

utils/gen-unicode-data/Package.swift

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// swift-tools-version:5.4
2+
3+
import PackageDescription
4+
5+
let package = Package(
6+
name: "GenUnicodeData",
7+
platforms: [.macOS(.v10_15)],
8+
targets: [
9+
.target(
10+
name: "GenUtils",
11+
dependencies: []
12+
)
13+
]
14+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
public struct BitArray {
14+
public var words: [UInt64]
15+
public var size: UInt16
16+
17+
public init(size: Int) {
18+
self.words = .init(repeating: 0, count: (size + 63) / 64)
19+
self.size = UInt16(size)
20+
}
21+
22+
public subscript(_ bit: Int) -> Bool {
23+
get {
24+
return words[bit / 64] & (1 << (bit % 64)) != 0
25+
}
26+
27+
set {
28+
if newValue {
29+
words[bit / 64] |= 1 << (bit % 64)
30+
} else {
31+
words[bit / 64] &= ~(1 << (bit % 64))
32+
}
33+
}
34+
}
35+
36+
public mutating func insert(_ bit: Int) -> Bool {
37+
let oldData = words[bit / 64]
38+
words[bit / 64] |= 1 << (bit % 64)
39+
return oldData & (1 << (bit % 64)) == 0
40+
}
41+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
public func emitCollection<C: Collection>(
14+
_ collection: C,
15+
name: String,
16+
type: String,
17+
into result: inout String,
18+
formatter: (C.Element) -> String
19+
) {
20+
result += """
21+
static const \(type) \(name)[\(collection.count)] = {
22+
23+
"""
24+
25+
formatCollection(collection, into: &result, using: formatter)
26+
27+
result += "\n};\n\n"
28+
}
29+
30+
public func emitCollection<C: Collection>(
31+
_ collection: C,
32+
name: String,
33+
into result: inout String
34+
) where C.Element: FixedWidthInteger {
35+
result += """
36+
static const __swift_uint\(C.Element.bitWidth)_t \(name)[\(collection.count)] = {
37+
38+
"""
39+
40+
formatCollection(collection, into: &result) {
41+
"0x\(String($0, radix: 16, uppercase: true))"
42+
}
43+
44+
result += "\n};\n\n"
45+
}
46+
47+
// Emits an abstract minimal perfect hash function into C arrays.
48+
public func emitMph(_ mph: Mph, name: String, into result: inout String) {
49+
emitMphSizes(mph, name, into: &result)
50+
emitMphBitarrays(mph, name, into: &result)
51+
emitMphRanks(mph, name, into: &result)
52+
}
53+
54+
// BitArray sizes
55+
func emitMphSizes(_ mph: Mph, _ name: String, into result: inout String) {
56+
emitCollection(
57+
mph.bitArrays,
58+
name: "\(name)_sizes",
59+
type: "__swift_uint16_t",
60+
into: &result
61+
) {
62+
"0x\(String($0.size, radix: 16, uppercase: true))"
63+
}
64+
}
65+
66+
func emitMphBitarrays(_ mph: Mph, _ name: String, into result: inout String) {
67+
// Individual bitarrays
68+
69+
for (i, ba) in mph.bitArrays.enumerated() {
70+
emitCollection(ba.words, name: "\(name)_keys\(i)", into: &result)
71+
}
72+
73+
// Overall bitarrays
74+
75+
emitCollection(
76+
mph.bitArrays.indices,
77+
name: "\(name)_keys",
78+
type: "__swift_uint64_t * const",
79+
into: &result
80+
) {
81+
"\(name)_keys\($0)"
82+
}
83+
}
84+
85+
func emitMphRanks(_ mph: Mph, _ name: String, into result: inout String) {
86+
// Individual ranks
87+
88+
for (i, rank) in mph.ranks.enumerated() {
89+
emitCollection(rank, name: "\(name)_ranks\(i)", into: &result)
90+
}
91+
92+
// Overall ranks
93+
94+
emitCollection(
95+
mph.ranks.indices,
96+
name: "\(name)_ranks",
97+
type: "__swift_uint16_t * const",
98+
into: &result
99+
) {
100+
"\(name)_ranks\($0)"
101+
}
102+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import Foundation
14+
15+
public func readFile(_ path: String) -> String {
16+
do {
17+
return try String(contentsOfFile: path)
18+
} catch {
19+
fatalError(error.localizedDescription)
20+
}
21+
}
22+
23+
public func write(_ data: String, to path: String) {
24+
do {
25+
try data.write(toFile: path, atomically: false, encoding: .utf8)
26+
} catch {
27+
fatalError(error.localizedDescription)
28+
}
29+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// Takes an unflattened array of scalar ranges and some Equatable property and
14+
// attempts to merge ranges who share the same Equatable property. E.g:
15+
//
16+
// 0x0 ... 0xA = .control
17+
// 0xB ... 0xB = .control
18+
// 0xC ... 0x1F = .control
19+
//
20+
// into:
21+
//
22+
// 0x0 ... 0x1F = .control
23+
public func flatten<T: Equatable>(
24+
_ unflattened: [(ClosedRange<UInt32>, T)]
25+
) -> [(ClosedRange<UInt32>, T)] {
26+
var result: [(ClosedRange<UInt32>, T)] = []
27+
28+
for elt in unflattened.sorted(by: { $0.0.lowerBound < $1.0.lowerBound }) {
29+
guard !result.isEmpty, result.last!.1 == elt.1 else {
30+
result.append(elt)
31+
continue
32+
}
33+
34+
if elt.0.lowerBound == result.last!.0.upperBound + 1 {
35+
result[result.count - 1].0 = result.last!.0.lowerBound ... elt.0.upperBound
36+
} else {
37+
result.append(elt)
38+
}
39+
}
40+
41+
return result
42+
}
43+
44+
// Takes an unflattened array of scalars and some Equatable property and
45+
// attempts to merge scalars into ranges who share the same Equatable
46+
// property. E.g:
47+
//
48+
// 0x9 = .control
49+
// 0xA = .control
50+
// 0xB = .control
51+
// 0xC = .control
52+
//
53+
// into:
54+
//
55+
// 0x9 ... 0xC = .control
56+
public func flatten<T: Equatable>(
57+
_ unflattened: [(UInt32, T)]
58+
) -> [(ClosedRange<UInt32>, T)] {
59+
var result: [(ClosedRange<UInt32>, T)] = []
60+
61+
for elt in unflattened.sorted(by: { $0.0 < $1.0 }) {
62+
guard !result.isEmpty, result.last!.1 == elt.1 else {
63+
result.append((elt.0 ... elt.0, elt.1))
64+
continue
65+
}
66+
67+
if elt.0 == result.last!.0.upperBound + 1 {
68+
result[result.count - 1].0 = result.last!.0.lowerBound ... elt.0
69+
} else {
70+
result.append((elt.0 ... elt.0, elt.1))
71+
}
72+
}
73+
74+
return result
75+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// Given a collection, format it into a string within 80 columns and fitting as
14+
// many elements in a row as possible.
15+
public func formatCollection<C: Collection>(
16+
_ c: C,
17+
into result: inout String,
18+
using handler: (C.Element) -> String
19+
) {
20+
// Our row length always starts at 2 for the initial indentation.
21+
var rowLength = 2
22+
23+
for element in c {
24+
let string = handler(element)
25+
26+
if rowLength == 2 {
27+
result += " "
28+
}
29+
30+
if rowLength + string.count + 1 > 80 {
31+
result += "\n "
32+
33+
rowLength = 2
34+
} else {
35+
result += rowLength == 2 ? "" : " "
36+
}
37+
38+
result += "\(string),"
39+
40+
// string.count + , + space
41+
rowLength += string.count + 1 + 1
42+
}
43+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
func hash(_ key: UInt64, _ n: UInt64, seed: UInt64) -> UInt64 {
14+
let key = key | (n << 32)
15+
let hash = UInt64(murmur3(key, seed: UInt32(seed)))
16+
17+
return hash % n
18+
}
19+
20+
func scramble(_ key: UInt32) -> UInt32 {
21+
var key = key
22+
key &*= 0xCC9E2D51
23+
key = (key << 15) | (key >> 17)
24+
key &*= 0x1B873593
25+
return key
26+
}
27+
28+
func murmur3(_ key: UInt64, seed: UInt32) -> UInt32 {
29+
var hash = seed
30+
var k: UInt32
31+
var key = key
32+
33+
for _ in 0 ..< 2 {
34+
k = UInt32((key << 32) >> 32)
35+
key >>= 32
36+
37+
hash ^= scramble(k)
38+
hash = (hash << 13) | (hash >> 19)
39+
hash = hash &* 5 &+ 0xE6546B64
40+
}
41+
42+
hash ^= 8
43+
hash ^= hash >> 16
44+
hash &*= 0x85EBCA6B
45+
hash ^= hash >> 13
46+
hash &*= 0xC2B2AE35
47+
hash ^= hash >> 16
48+
49+
return hash
50+
}

0 commit comments

Comments
 (0)