diff --git a/Sources/SwiftSyntax/RawSyntax.swift b/Sources/SwiftSyntax/RawSyntax.swift index bab3d5ba94b..182b91e91ed 100644 --- a/Sources/SwiftSyntax/RawSyntax.swift +++ b/Sources/SwiftSyntax/RawSyntax.swift @@ -85,7 +85,7 @@ fileprivate enum RawSyntaxData { /// Represents the raw tree structure underlying the syntax tree. These nodes /// have no notion of identity and only provide structure to the tree. They /// are immutable and can be freely shared between syntax nodes. -struct RawSyntax { +final class RawSyntax { fileprivate let data: RawSyntaxData let presence: SourcePresence @@ -124,6 +124,13 @@ struct RawSyntax { } }).value } + + /// Creates a copy of `other`. + init(_ other: RawSyntax) { + self.data = other.data + self.presence = other.presence + self.id = other.id + } init(kind: SyntaxKind, layout: [RawSyntax?], presence: SourcePresence, id: SyntaxNodeId? = nil) { @@ -336,7 +343,7 @@ extension RawSyntax { extension RawSyntax: Codable { /// Creates a RawSyntax from the provided Foundation Decoder. - init(from decoder: Decoder) throws { + convenience init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) let id = try container.decodeIfPresent(SyntaxNodeId.self, forKey: .id) let omitted = try container.decodeIfPresent(Bool.self, forKey: .omitted) ?? false @@ -352,7 +359,7 @@ extension RawSyntax: Codable { guard let lookupNode = lookupFunc(id) else { throw IncrementalDecodingError.nodeLookupFailed(id) } - self = lookupNode + self.init(lookupNode) return } diff --git a/Sources/SwiftSyntax/SwiftSyntax.swift b/Sources/SwiftSyntax/SwiftSyntax.swift index df96532e98e..3551d52194b 100644 --- a/Sources/SwiftSyntax/SwiftSyntax.swift +++ b/Sources/SwiftSyntax/SwiftSyntax.swift @@ -46,17 +46,24 @@ public enum SerializationFormat { case byteTree } +// For WeakLookupTable. +extension RawSyntax : Identifiable {} + /// Deserializes the syntax tree from its serialized form to an object tree in /// Swift. To deserialize incrementally transferred syntax trees, the same /// instance of the deserializer must be used for all subsequent /// deserializations. public final class SyntaxTreeDeserializer { - // FIXME: This lookup table just accumulates nodes, we should invalidate nodes - // that are no longer used at some point and remove them from the table - /// Syntax nodes that have already been parsed and are able to be reused if /// they were omitted in an incremental syntax tree transfer - private var nodeLookupTable: [SyntaxNodeId: RawSyntax] = [:] + private var nodeLookupTable: WeakLookupTable = .init() + + /// Keep a strong reference to the syntax tree that contains the nodes in the + /// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference + /// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated + /// once we set a new tree. The weak references in `nodeLookupTable` will then + /// become `nil` and the slot will be reused to refer another node. + private var nodeLookupTree: RawSyntax? = nil /// The IDs of the nodes that were reused as part of incremental syntax /// parsing during the last deserialization @@ -70,7 +77,9 @@ public final class SyntaxTreeDeserializer { let decoder = JSONDecoder() decoder.userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable decoder.userInfo[.omittedNodeLookupFunction] = self.lookupNode - return try decoder.decode(RawSyntax.self, from: data) + let tree = try decoder.decode(RawSyntax.self, from: data) + self.nodeLookupTree = tree + return tree } /// Deserialize the given data as a ByteTree encoded syntax tree @@ -78,11 +87,13 @@ public final class SyntaxTreeDeserializer { var userInfo: [ByteTreeUserInfoKey: Any] = [:] userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable userInfo[.omittedNodeLookupFunction] = self.lookupNode - return try ByteTreeReader.read(RawSyntax.self, from: data, + let tree = try ByteTreeReader.read(RawSyntax.self, from: data, userInfo: &userInfo) { (version: ByteTreeProtocolVersion) in return version.major == 1 } + self.nodeLookupTree = tree + return tree } /// Decode a serialized form of SourceFileSyntax to a syntax tree. @@ -116,7 +127,7 @@ public final class SyntaxTreeDeserializer { } private func addToLookupTable(_ node: RawSyntax) { - nodeLookupTable[node.id] = node + nodeLookupTable.insert(node) } } diff --git a/Sources/SwiftSyntax/WeakLookupTable.swift b/Sources/SwiftSyntax/WeakLookupTable.swift new file mode 100644 index 00000000000..0d1ea3dea74 --- /dev/null +++ b/Sources/SwiftSyntax/WeakLookupTable.swift @@ -0,0 +1,228 @@ +//===----------- WeakLookupTable.swift - Swift Syntax Library -------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +// This file provides lookup table with weak reference. +//===----------------------------------------------------------------------===// + +/// Protocol for self-identifiable object +protocol Identifiable { + associatedtype Identifier : Hashable + var id: Identifier { get } +} + +private struct WeakReference: ExpressibleByNilLiteral { + weak var value: T? + init(_ value: T?) { + self.value = value + } + init(nilLiteral: ()) { + self.value = nil + } +} + +/// 'Set' like container providing lookup table for objects with identifier. +/// This doesn't take ownership of the objects. Instead, the objects are +/// held by weak references. +/// +/// References are stored in a hash table with simple open adressing. Because +/// of weak reference, unlike normal open addressing, erased bucket are simply +/// turned into 'nil'. +class WeakLookupTable { + + /// Storage for the hash table. + private var buckets: UnsafeMutablePointer> + private var bucketCount: Int + + /// Estimated count of inserted values. This is greater than or equal to + /// the number of actually occupied buckets. + /// i.e. estimatedCount >= _countOccupiedBuckets() + private var estimatedCount: Int + + init(capacity: Int = 0) { + bucketCount = WeakLookupTable._bucketCount(for: capacity) + buckets = .allocate(capacity: bucketCount) + buckets.initialize(repeating: nil, count: bucketCount) + estimatedCount = 0 + } + + deinit { + buckets.deinitialize(count: bucketCount) + buckets.deallocate() + } + + /// Constant max load factor for hash table. + private static var maxLoadFactor: Double { + @inline(__always) get { + return 0.75 + } + } + + /// Minimal number of bucket count enough to hold specified capacity of + /// objects with taking max load factor into account. + private static func _minimalBucketCount(for capacity: Int) -> Int { + return Int((Double(capacity) / maxLoadFactor).rounded(.up)) + } + + /// Number of bucket count to allocate to hold specified number of objects. + /// This is the next power of 2 greater than or equal to + /// '_minimalBucketCount(for: capacity)' + private static func _bucketCount(for capacity: Int, + from current: Int = 2) -> Int { + // Bucket count must always be power of 2. + precondition((current & (current - 1)) == 0) + // Minimum is 2 to guarantee at least 1 hole. + precondition(current >= 2) + + let minimalBucketCount = _minimalBucketCount(for: capacity) + + // Make sure it's representable. If 'minimalBucketCount' here is over + // 0x4000_..., the bucket count must be 0x8000_... thus overflows. + precondition(minimalBucketCount <= (Int.max >> 1) + 1) + + var bucketCount = current + while bucketCount < minimalBucketCount { + // '&*=' for performance. Guaranteed by above 'precondition()'. + bucketCount &*= 2 + } + return bucketCount + } + + private var _bucketMask: Int { + @inline(__always) get { + // '&-' for performance. We know 'bucketCount >= 2'. + return bucketCount &- 1 + } + } + + @inline(__always) + private func _idealBucket(for id: Element.Identifier) -> Int { + return id.hashValue & _bucketMask + } + + /// Finds the bucket where the object with the specified id should be stored + /// to. + private + func _findHole(_ id: Element.Identifier) -> (pos: Int, alreadyExists: Bool) { + var bucket = _idealBucket(for: id) + + // Starting from the ideal bucket for the id, search an available bucket, + // or the bucket holding the id. + while true { + guard let obj = buckets[bucket].value else { + return (bucket, false) + } + if obj.id == id { + return (bucket, true) + } + // '&+' for performance. 'bucketCount' is 0x4000_... or below. + bucket = (bucket &+ 1) & _bucketMask + } + } + + /// Reserves enough space to store the specified number of elements. Returns + /// true if resizing happened. + func reserveCapacity(_ requiredCapacity: Int) -> Bool { + let requiredBucketCount = WeakLookupTable + ._bucketCount(for: requiredCapacity, from: bucketCount) + if (bucketCount >= requiredBucketCount) { + return false + } + + // Slow path. Resizing. + let oldBuckets = buckets + let oldBucketRange = buckets ..< buckets.advanced(by: bucketCount) + + bucketCount = requiredBucketCount + buckets = .allocate(capacity: requiredBucketCount) + buckets.initialize(repeating: nil, count: requiredBucketCount) + + // Move all nodes from the old buffer. + for oldBucket in oldBucketRange { + if let id = oldBucket.pointee.value?.id { + let newBucket = buckets.advanced(by: _findHole(id).pos) + newBucket.moveAssign(from: oldBucket, count: 1) + } else { + oldBucket.deinitialize(count: 1) + } + } + + oldBuckets.deallocate() + + return true + } + + /// Count the actual number of occupied buckets. + @inline(__always) + private func _countOccupiedBuckets() -> Int { + var count = 0 + for i in 0 ..< bucketCount where buckets[i].value != nil { + // '&+=' for performance. 'bucketCount' is 0x4000_... or below. + count &+= 1 + } + return count + } + + /// Reserves enough space to store a single new object. Returns true if + /// resizing happened. + private func _ensurePlusOneCapacity() -> Bool { + // '&+' for performance. 'estimatedCount' is always less than 'bucketCount' + // which is 0x4000_... or below. + if bucketCount >= WeakLookupTable + ._minimalBucketCount(for: estimatedCount &+ 1) { + return false + } + + // Slow path. + estimatedCount = _countOccupiedBuckets() + // '&+' for performance. We know 'estimatedCount' derived by + // '_countOccupiedBuckets()' is equal to or less than previous + // 'estimatedCount'. + return reserveCapacity(estimatedCount &+ 1) + } + + /// Inserts the given object into the table. + @discardableResult + func insert(_ obj: Element) -> Bool { + var (pos, alreadyExists) = _findHole(obj.id) + if alreadyExists { + return false + } + + if /*resized=*/_ensurePlusOneCapacity() { + pos = _findHole(obj.id).pos + } + buckets[pos].value = obj + // '&+=' for performance. '_ensurePlusOneCapacity()' ensures it's safe. + estimatedCount &+= 1 + return true + } + + /// Get a object with specified id. Returns 'nil' if the object hasn't been + /// insert()-ed or it's already been freed. + subscript(id: Element.Identifier) -> Element? { + // Since we don't fill the bucket when the object is freed (because we don't + // know), we can't stop iteration at a hole. So in the worst case (i.e. if + // the object doesn't exist in the table), full linear search is needed. + // However, since we assume the object exists and hasn't been freed yet, + // we expect it's stored near the 'idealBucket' anyway. + let idealBucket = _idealBucket(for: id) + var bucket = idealBucket + repeat { + if let obj = buckets[bucket].value, obj.id == id { + return obj + } + // '&+' for performance. 'bucketCount' is 0x4000_... or below. + bucket = (bucket &+ 1) & _bucketMask + } while bucket != idealBucket + + return nil + } +} diff --git a/Tests/LinuxMain.swift b/Tests/LinuxMain.swift index 2385dc58522..eee4c1b2a4f 100644 --- a/Tests/LinuxMain.swift +++ b/Tests/LinuxMain.swift @@ -1,14 +1,20 @@ import XCTest import SwiftSyntaxTest -XCTMain([ - testCase(AbsolutePositionTestCase.allTests), - testCase(DecodeSyntaxTestCase.allTests), - testCase(DiagnosticTestCase.allTests), - testCase(LazyCachingTestCase.allTests), - testCase(ParseFileTestCase.allTests), - testCase(SyntaxChildrenAPITestCase.allTests), - testCase(SyntaxCollectionsAPITestCase.allTests), - testCase(SyntaxFactoryAPITestCase.allTests), - testCase(SyntaxVisitorTestCase.allTests), -]) +XCTMain({ () -> [XCTestCaseEntry] in + var testCases: [XCTestCaseEntry] = [ + testCase(AbsolutePositionTestCase.allTests), + testCase(DecodeSyntaxTestCase.allTests), + testCase(DiagnosticTestCase.allTests), + testCase(LazyCachingTestCase.allTests), + testCase(ParseFileTestCase.allTests), + testCase(SyntaxChildrenAPITestCase.allTests), + testCase(SyntaxCollectionsAPITestCase.allTests), + testCase(SyntaxFactoryAPITestCase.allTests), + testCase(SyntaxVisitorTestCase.allTests), + ] +#if DEBUG + testCases.append(testCase(WeakLookupTableTestCase.allTests)) +#endif + return testCases +}()) diff --git a/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift b/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift new file mode 100644 index 00000000000..1c8152f6d57 --- /dev/null +++ b/Tests/SwiftSyntaxTest/WeakLookupTableTest.swift @@ -0,0 +1,113 @@ +#if DEBUG // @testable import + +import XCTest +@testable import SwiftSyntax + +final class MinimalNode : Identifiable { + let id: String + let children: [MinimalNode?] + init(id: String, children: [MinimalNode?]) { + self.id = id + self.children = children + } +} + +func createNode( + _ table: WeakLookupTable, + _ id: String, + _ children: [MinimalNode?] = [] +) -> MinimalNode { + let node = MinimalNode(id: id, children: children) + XCTAssertTrue(table.insert(node)) + return node +} + +public class WeakLookupTableTestCase: XCTestCase { + + public static let allTests = [ + ("testBasic", testBasic), + ] + + func testBasic() { + let table = WeakLookupTable() + var tree: MinimalNode? = nil + + // ------------------------------------------------------------------------ + // Initial. + tree = createNode(table, "root1", [ + createNode(table, "1", [ + createNode(table, "1-1"), + createNode(table, "1-2"), + createNode(table, "1-3"), + ]), + createNode(table, "2", [ + createNode(table, "2-1"), + nil, + createNode(table, "2-2"), + ]), + createNode(table, "3"), + ]) + + XCTAssertNil(table["notexist"]) + + XCTAssertNotNil(table["root1"]) + XCTAssertNotNil(table["1"]) + XCTAssertNotNil(table["1-1"]) + XCTAssertNotNil(table["1-2"]) + XCTAssertNotNil(table["1-3"]) + XCTAssertNotNil(table["2"]) + XCTAssertNotNil(table["2-1"]) + XCTAssertNotNil(table["2-2"]) + XCTAssertNotNil(table["3"]) + + // ------------------------------------------------------------------------ + // Partially replaced. + tree = createNode(table, "root2", [ + table["1"], + createNode(table, "4", [ + table["2-1"], + nil, + createNode(table, "4-3") + ]), + table["3"], + ]) + + // Reused nodes. + XCTAssertNotNil(table["1"]) + XCTAssertNotNil(table["1-1"]) + XCTAssertNotNil(table["1-2"]) + XCTAssertNotNil(table["1-3"]) + XCTAssertNotNil(table["2-1"]) + XCTAssertNotNil(table["3"]) + + // Newly added nodes. + XCTAssertNotNil(table["root2"]) + XCTAssertNotNil(table["4"]) + XCTAssertNotNil(table["4-3"]) + + // Destroyed nodes. + XCTAssertNil(table["root1"]) + XCTAssertNil(table["2"]) + XCTAssertNil(table["2-2"]) + + // ------------------------------------------------------------------------ + // Completely replaced. + tree = createNode(table, "empty") + + XCTAssertNotNil(table["empty"]) + + XCTAssertNil(table["root2"]) + XCTAssertNil(table["1"]) + XCTAssertNil(table["1-1"]) + XCTAssertNil(table["1-2"]) + XCTAssertNil(table["1-3"]) + XCTAssertNil(table["2-1"]) + XCTAssertNil(table["3"]) + XCTAssertNil(table["4"]) + XCTAssertNil(table["4-3"]) + + _ = tree // Prevents 'never read' warning + } +} + +#endif // DEBUG