swiftlang · rintaro · Sep 22, 2018 · Aug 29, 2018 · Aug 29, 2018 · Sep 14, 2018
diff --git a/Sources/SwiftSyntax/RawSyntax.swift b/Sources/SwiftSyntax/RawSyntax.swift
@@ -85,7 +85,7 @@ fileprivate enum RawSyntaxData {
 /// Represents the raw tree structure underlying the syntax tree. These nodes
 /// have no notion of identity and only provide structure to the tree. They
 /// are immutable and can be freely shared between syntax nodes.
-struct RawSyntax {
+final class RawSyntax {
   fileprivate let data: RawSyntaxData
   let presence: SourcePresence
 
@@ -124,6 +124,13 @@ struct RawSyntax {
       }
     }).value
   }
+
+  /// Creates a copy of `other`.
+  init(_ other: RawSyntax) {
+    self.data = other.data
+    self.presence = other.presence
+    self.id = other.id
+  }
 
   init(kind: SyntaxKind, layout: [RawSyntax?], presence: SourcePresence,
        id: SyntaxNodeId? = nil) {
@@ -336,7 +343,7 @@ extension RawSyntax {
 
 extension RawSyntax: Codable {
   /// Creates a RawSyntax from the provided Foundation Decoder.
-  init(from decoder: Decoder) throws {
+  convenience init(from decoder: Decoder) throws {
     let container = try decoder.container(keyedBy: CodingKeys.self)
     let id = try container.decodeIfPresent(SyntaxNodeId.self, forKey: .id)
     let omitted = try container.decodeIfPresent(Bool.self, forKey: .omitted) ?? false
@@ -352,7 +359,7 @@ extension RawSyntax: Codable {
       guard let lookupNode = lookupFunc(id) else {
         throw IncrementalDecodingError.nodeLookupFailed(id)
       }
-      self = lookupNode
+      self.init(lookupNode)
       return
     }
 

diff --git a/Sources/SwiftSyntax/SwiftSyntax.swift b/Sources/SwiftSyntax/SwiftSyntax.swift
@@ -46,17 +46,24 @@ public enum SerializationFormat {
   case byteTree
 }
 
+// For WeakLookupTable.
+extension RawSyntax : Identifiable {}
+
 /// Deserializes the syntax tree from its serialized form to an object tree in
 /// Swift. To deserialize incrementally transferred syntax trees, the same
 /// instance of the deserializer must be used for all subsequent
 /// deserializations.
 public final class SyntaxTreeDeserializer {
-  // FIXME: This lookup table just accumulates nodes, we should invalidate nodes
-  // that are no longer used at some point and remove them from the table
-
   /// Syntax nodes that have already been parsed and are able to be reused if
   /// they were omitted in an incremental syntax tree transfer
-  private var nodeLookupTable: [SyntaxNodeId: RawSyntax] = [:]
+  private var nodeLookupTable: WeakLookupTable<RawSyntax> = .init()
+
+  /// Keep a strong reference to the syntax tree that contains the nodes in the
+  /// `nodeLookupTable`. Because `nodeLookupTable` only holds a weak reference 
+  /// to the RawSyntax nodes, all retired `RawSyntax` nodes will be deallocated
+  /// once we set a new tree. The weak references in `nodeLookupTable` will then
+  /// become `nil` and the slot will be reused to refer another node.
+  private var nodeLookupTree: RawSyntax? = nil
 
   /// The IDs of the nodes that were reused as part of incremental syntax
   /// parsing during the last deserialization
@@ -70,19 +77,23 @@ public final class SyntaxTreeDeserializer {
     let decoder = JSONDecoder()
     decoder.userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable
     decoder.userInfo[.omittedNodeLookupFunction] = self.lookupNode
-    return try decoder.decode(RawSyntax.self, from: data)
+    let tree = try decoder.decode(RawSyntax.self, from: data)
+    self.nodeLookupTree = tree
+    return tree
   }
 
   /// Deserialize the given data as a ByteTree encoded syntax tree
   private func deserializeByteTree(_ data: Data) throws -> RawSyntax {
     var userInfo: [ByteTreeUserInfoKey: Any] = [:]
     userInfo[.rawSyntaxDecodedCallback] = self.addToLookupTable
     userInfo[.omittedNodeLookupFunction] = self.lookupNode
-    return try ByteTreeReader.read(RawSyntax.self, from: data,
+    let tree = try ByteTreeReader.read(RawSyntax.self, from: data,
                                             userInfo: &userInfo) {
       (version: ByteTreeProtocolVersion) in
       return version.major == 1
     }
+    self.nodeLookupTree = tree
+    return tree
   }
 
   /// Decode a serialized form of SourceFileSyntax to a syntax tree.
@@ -116,7 +127,7 @@ public final class SyntaxTreeDeserializer {
   }
 
   private func addToLookupTable(_ node: RawSyntax) {
-    nodeLookupTable[node.id] = node
+    nodeLookupTable.insert(node)
   }
 }
 

diff --git a/Sources/SwiftSyntax/WeakLookupTable.swift b/Sources/SwiftSyntax/WeakLookupTable.swift
@@ -0,0 +1,228 @@
+//===----------- WeakLookupTable.swift - Swift Syntax Library -------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2018 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// This file provides lookup table with weak reference.
+//===----------------------------------------------------------------------===//
+
+/// Protocol for self-identifiable object
+protocol Identifiable {
+  associatedtype Identifier : Hashable
+  var id: Identifier { get }
+}
+
+private struct WeakReference<T: AnyObject>: ExpressibleByNilLiteral {
+  weak var value: T?
+  init(_ value: T?) {
+    self.value = value
+  }
+  init(nilLiteral: ()) {
+    self.value = nil
+  }
+}
+
+/// 'Set' like container providing lookup table for objects with identifier.
+/// This doesn't take ownership of the objects. Instead, the objects are
+/// held by weak references.
+/// 
+/// References are stored in a hash table with simple open adressing. Because
+/// of weak reference, unlike normal open addressing, erased bucket are simply
+/// turned into 'nil'.
+class WeakLookupTable<Element: Identifiable & AnyObject> {
+
+  /// Storage for the hash table.
+  private var buckets: UnsafeMutablePointer<WeakReference<Element>>
+  private var bucketCount: Int
+
+  /// Estimated count of inserted values. This is greater than or equal to
+  /// the number of actually occupied buckets.
+  /// i.e. estimatedCount >= _countOccupiedBuckets()
+  private var estimatedCount: Int
+
+  init(capacity: Int = 0) {
+    bucketCount = WeakLookupTable<Element>._bucketCount(for: capacity)
+    buckets = .allocate(capacity: bucketCount)
+    buckets.initialize(repeating: nil, count: bucketCount)
+    estimatedCount = 0
+  }
+
+  deinit {
+    buckets.deinitialize(count: bucketCount)
+    buckets.deallocate()
+  }
+
+  /// Constant max load factor for hash table.
+  private static var maxLoadFactor: Double {
+    @inline(__always) get {
+      return 0.75
+    }
+  }
+
+  /// Minimal number of bucket count enough to hold specified capacity of
+  /// objects with taking max load factor into account.
+  private static func _minimalBucketCount(for capacity: Int) -> Int {
+    return Int((Double(capacity) / maxLoadFactor).rounded(.up))
+  }
+
+  /// Number of bucket count to allocate to hold specified number of objects.
+  /// This is the next power of 2 greater than or equal to
+  /// '_minimalBucketCount(for: capacity)'
+  private static func _bucketCount(for capacity: Int,
+                                   from current: Int = 2) -> Int {
+    // Bucket count must always be power of 2.
+    precondition((current & (current - 1)) == 0)
+    // Minimum is 2 to guarantee at least 1 hole.
+    precondition(current >= 2)
+
+    let minimalBucketCount = _minimalBucketCount(for: capacity)
+
+    // Make sure it's representable. If 'minimalBucketCount' here is over
+    // 0x4000_..., the bucket count must be 0x8000_... thus overflows.
+    precondition(minimalBucketCount <= (Int.max >> 1) + 1)
+
+    var bucketCount = current
+    while bucketCount < minimalBucketCount {
+      // '&*=' for performance. Guaranteed by above 'precondition()'.
+      bucketCount &*= 2
+    }
+    return bucketCount
+  }
+
+  private var _bucketMask: Int {
+    @inline(__always) get {
+      // '&-' for performance. We know 'bucketCount >= 2'.
+      return bucketCount &- 1
+    }
+  }
+
+  @inline(__always)
+  private func _idealBucket(for id: Element.Identifier) -> Int {
+    return id.hashValue & _bucketMask
+  }
+
+  /// Finds the bucket where the object with the specified id should be stored
+  /// to.
+  private
+  func _findHole(_ id: Element.Identifier) -> (pos: Int, alreadyExists: Bool) {
+    var bucket = _idealBucket(for: id)
+
+    // Starting from the ideal bucket for the id, search an available bucket,
+    // or the bucket holding the id.
+    while true {
+      guard let obj = buckets[bucket].value else {
+        return (bucket, false)
+      }
+      if obj.id == id {
+        return (bucket, true)
+      }
+      // '&+' for performance. 'bucketCount' is 0x4000_... or below.
+      bucket = (bucket &+ 1) & _bucketMask
+    }
+  }
+
+  /// Reserves enough space to store the specified number of elements. Returns
+  /// true if resizing happened.
+  func reserveCapacity(_ requiredCapacity: Int) -> Bool {
+    let requiredBucketCount = WeakLookupTable<Element>
+      ._bucketCount(for: requiredCapacity, from: bucketCount)
+    if (bucketCount >= requiredBucketCount) {
+      return false
+    }
+
+    // Slow path. Resizing.
+    let oldBuckets = buckets
+    let oldBucketRange = buckets ..< buckets.advanced(by: bucketCount)
+
+    bucketCount = requiredBucketCount
+    buckets = .allocate(capacity: requiredBucketCount)
+    buckets.initialize(repeating: nil, count: requiredBucketCount)
+
+    // Move all nodes from the old buffer.
+    for oldBucket in oldBucketRange {
+      if let id = oldBucket.pointee.value?.id {
+        let newBucket = buckets.advanced(by: _findHole(id).pos)
+        newBucket.moveAssign(from: oldBucket, count: 1)
+      } else {
+        oldBucket.deinitialize(count: 1)
+      }
+    }
+
+    oldBuckets.deallocate()
+
+    return true
+  }
+
+  /// Count the actual number of occupied buckets.
+  @inline(__always)
+  private func _countOccupiedBuckets() -> Int {
+    var count = 0
+    for i in 0 ..< bucketCount where buckets[i].value != nil {
+      // '&+=' for performance. 'bucketCount' is 0x4000_... or below.
+      count &+= 1
+    }
+    return count
+  }
+
+  /// Reserves enough space to store a single new object. Returns true if
+  /// resizing happened.
+  private func _ensurePlusOneCapacity() -> Bool {
+    // '&+' for performance. 'estimatedCount' is always less than 'bucketCount'
+    // which is 0x4000_... or below.
+    if bucketCount >= WeakLookupTable<Element>
+                        ._minimalBucketCount(for: estimatedCount &+ 1) {
+      return false
+    }
+
+    // Slow path.
+    estimatedCount = _countOccupiedBuckets()
+    // '&+' for performance. We know 'estimatedCount' derived by
+    // '_countOccupiedBuckets()' is equal to or less than previous
+    // 'estimatedCount'.
+    return reserveCapacity(estimatedCount &+ 1)
+  }
+
+  /// Inserts the given object into the table.
+  @discardableResult
+  func insert(_ obj: Element) -> Bool {
+    var (pos, alreadyExists) = _findHole(obj.id)
+    if alreadyExists {
+      return false
+    }
+
+    if /*resized=*/_ensurePlusOneCapacity() {
+      pos = _findHole(obj.id).pos
+    }
+    buckets[pos].value = obj
+    // '&+=' for performance. '_ensurePlusOneCapacity()' ensures it's safe.
+    estimatedCount &+= 1
+    return true
+  }
+
+  /// Get a object with specified id. Returns 'nil' if the object hasn't been
+  /// insert()-ed or it's already been freed.
+  subscript(id: Element.Identifier) -> Element? {
+    // Since we don't fill the bucket when the object is freed (because we don't
+    // know), we can't stop iteration at a hole. So in the worst case (i.e. if
+    // the object doesn't exist in the table), full linear search is needed.
+    // However, since we assume the object exists and hasn't been freed yet,
+    // we expect it's stored near the 'idealBucket' anyway.
+    let idealBucket = _idealBucket(for: id)
+    var bucket = idealBucket
+    repeat {
+      if let obj = buckets[bucket].value, obj.id == id {
+        return obj
+      }
+      // '&+' for performance. 'bucketCount' is 0x4000_... or below.
+      bucket = (bucket &+ 1) & _bucketMask
+    } while bucket != idealBucket
+
+    return nil
+  }
+}
diff --git a/Tests/LinuxMain.swift b/Tests/LinuxMain.swift
@@ -1,14 +1,20 @@
 import XCTest
 import SwiftSyntaxTest
 
-XCTMain([
-  testCase(AbsolutePositionTestCase.allTests),
-  testCase(DecodeSyntaxTestCase.allTests),
-  testCase(DiagnosticTestCase.allTests),
-  testCase(LazyCachingTestCase.allTests),
-  testCase(ParseFileTestCase.allTests),
-  testCase(SyntaxChildrenAPITestCase.allTests),
-  testCase(SyntaxCollectionsAPITestCase.allTests),
-  testCase(SyntaxFactoryAPITestCase.allTests),
-  testCase(SyntaxVisitorTestCase.allTests),
-])
+XCTMain({ () -> [XCTestCaseEntry] in
+  var testCases: [XCTestCaseEntry] = [
+    testCase(AbsolutePositionTestCase.allTests),
+    testCase(DecodeSyntaxTestCase.allTests),
+    testCase(DiagnosticTestCase.allTests),
+    testCase(LazyCachingTestCase.allTests),
+    testCase(ParseFileTestCase.allTests),
+    testCase(SyntaxChildrenAPITestCase.allTests),
+    testCase(SyntaxCollectionsAPITestCase.allTests),
+    testCase(SyntaxFactoryAPITestCase.allTests),
+    testCase(SyntaxVisitorTestCase.allTests),
+  ]
+#if DEBUG
+  testCases.append(testCase(WeakLookupTableTestCase.allTests))
+#endif
+  return testCases
+}())