Skip to content

Commit 788a750

Browse files
committed
Improve performance of SyntacticTestIndex in large projects by scanning files in batches
1 parent 2a6fb59 commit 788a750

File tree

1 file changed

+83
-47
lines changed

1 file changed

+83
-47
lines changed

Sources/SourceKitLSP/Swift/SyntacticTestIndex.swift

Lines changed: 83 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import SKSupport
1818
/// Task metadata for `SyntacticTestIndexer.indexingQueue`
1919
fileprivate enum TaskMetadata: DependencyTracker, Equatable {
2020
case read
21-
case index(DocumentURI)
21+
case index(Set<DocumentURI>)
2222

2323
/// Reads can be concurrent and files can be indexed concurrently. But we need to wait for all files to finish
2424
/// indexing before reading the index.
@@ -38,12 +38,12 @@ fileprivate enum TaskMetadata: DependencyTracker, Equatable {
3838
// This ensures that the index has been updated at least to the state of file at which the read was scheduled.
3939
// Adding the dependency also elevates the index task's priorities.
4040
return true
41-
case (.index(let lhsUri), .index(let rhsUri)):
42-
// Technically, we should be able to allow simultaneous indexing of the same file. But conceptually the code
43-
// becomes simpler if we don't need to think racing indexing tasks for the same file and it shouldn't make a
44-
// performance impact because if the same file state is indexed twice, the second one will realize that the mtime
45-
// hasn't changed and thus be a no-op.
46-
return lhsUri == rhsUri
41+
case (.index(let lhsUris), .index(let rhsUris)):
42+
// Technically, we should be able to allow simultaneous indexing of the same file. When a file gets re-scheduled
43+
// for indexing, all previous index invocations should get cancelled. But conceptually the code becomes simpler
44+
// if we don't need to think racing indexing tasks for the same file and it shouldn't make a performance impact
45+
// in practice because of the cancellation described before.
46+
return !lhsUris.intersection(rhsUris).isEmpty
4747
}
4848
}
4949
}
@@ -112,14 +112,11 @@ actor SyntacticTestIndex {
112112
/// Called when the list of files that may contain tests is updated.
113113
///
114114
/// All files that are not in the new list of test files will be removed from the index.
115-
func listOfTestFilesDidChange(_ testFiles: Set<DocumentURI>) {
116-
let testFiles = Set(testFiles)
117-
let removedFiles = Set(self.indexedTests.keys.filter { !testFiles.contains($0) })
115+
func listOfTestFilesDidChange(_ testFiles: [DocumentURI]) {
116+
let removedFiles = Set(self.indexedTests.keys).subtracting(testFiles)
118117
removeFilesFromIndex(removedFiles)
119118

120-
for testFile in testFiles {
121-
rescanFile(testFile)
122-
}
119+
rescanFiles(testFiles)
123120
}
124121

125122
func filesDidChange(_ events: [FileEvent]) {
@@ -130,7 +127,7 @@ actor SyntacticTestIndex {
130127
// `listOfTestFilesDidChange`
131128
break
132129
case .changed:
133-
rescanFile(fileEvent.uri)
130+
rescanFiles([fileEvent.uri])
134131
case .deleted:
135132
removeFilesFromIndex([fileEvent.uri])
136133
default:
@@ -139,42 +136,57 @@ actor SyntacticTestIndex {
139136
}
140137
}
141138

142-
/// Called when a single file was updated. Just re-scans that file.
143-
private func rescanFile(_ uri: DocumentURI) {
144-
self.indexingQueue.async(priority: .low, metadata: .index(uri)) {
145-
guard let url = uri.fileURL else {
146-
logger.log("Not indexing \(uri.forLogging) for swift-testing tests because it is not a file URL")
147-
return
148-
}
149-
if Task.isCancelled {
150-
return
151-
}
152-
guard
153-
let fileModificationDate = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate]
154-
as? Date
155-
else {
156-
logger.fault("Not indexing \(uri.forLogging) for tests because the modification date could not be determined")
157-
return
158-
}
159-
if let indexModificationDate = self.indexedTests[uri]?.sourceFileModificationDate,
160-
indexModificationDate >= fileModificationDate
161-
{
162-
// Index already up to date.
163-
return
164-
}
165-
if Task.isCancelled {
166-
return
139+
/// Called when a list of files was updated. Re-scans those files
140+
private func rescanFiles(_ uris: [DocumentURI]) {
141+
// Divide the files into multiple batches. This is more efficient than spawning a new task for every file, mostly
142+
// because it keeps the number of pending items in `indexingQueue` low and adding a new task to `indexingQueue` is
143+
// in O(number of pending tasks), since we need to scan for dependency edges to add, which would make scanning files
144+
// be O(number of files).
145+
// Over-subscribe the processor count in case one batch finishes more quickly than another.
146+
let batches = uris.partition(intoNumberOfBatches: ProcessInfo.processInfo.processorCount * 4)
147+
for batch in batches {
148+
self.indexingQueue.async(priority: .low, metadata: .index(Set(batch))) {
149+
for uri in batch {
150+
await self.rescanFileAssumingOnQueue(uri)
151+
}
167152
}
168-
let testItems = await testItems(in: url)
153+
}
154+
}
169155

170-
if Task.isCancelled {
171-
// This `isCancelled` check is essential for correctness. When `testFilesDidChange` is called, it cancels all
172-
// indexing tasks for files that have been removed. If we didn't have this check, an index task that was already
173-
// started might add the file back into `indexedTests`.
174-
return
175-
}
176-
self.indexedTests[uri] = IndexedTests(tests: testItems, sourceFileModificationDate: fileModificationDate)
156+
/// Re-scans a single file. This method must be called in a task that is executing on `indexingQueue`.
157+
private func rescanFileAssumingOnQueue(_ uri: DocumentURI) async {
158+
guard let url = uri.fileURL else {
159+
logger.log("Not indexing \(uri.forLogging) for swift-testing tests because it is not a file URL")
160+
return
161+
}
162+
if Task.isCancelled {
163+
return
164+
}
165+
guard
166+
let fileModificationDate = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate]
167+
as? Date
168+
else {
169+
logger.fault("Not indexing \(uri.forLogging) for tests because the modification date could not be determined")
170+
return
177171
}
172+
if let indexModificationDate = self.indexedTests[uri]?.sourceFileModificationDate,
173+
indexModificationDate >= fileModificationDate
174+
{
175+
// Index already up to date.
176+
return
177+
}
178+
if Task.isCancelled {
179+
return
180+
}
181+
let testItems = await testItems(in: url)
182+
183+
if Task.isCancelled {
184+
// This `isCancelled` check is essential for correctness. When `testFilesDidChange` is called, it cancels all
185+
// indexing tasks for files that have been removed. If we didn't have this check, an index task that was already
186+
// started might add the file back into `indexedTests`.
187+
return
188+
}
189+
self.indexedTests[uri] = IndexedTests(tests: testItems, sourceFileModificationDate: fileModificationDate)
178190
}
179191

180192
/// Gets all the tests in the syntactic index.
@@ -187,3 +199,27 @@ actor SyntacticTestIndex {
187199
return await readTask.value
188200
}
189201
}
202+
203+
fileprivate extension Collection {
204+
/// Partition the elements of the collection into `numberOfBatches` roughly equally sized batches.
205+
///
206+
/// Elements are assigned to the batches round-robin. This ensures that elements that are close to each other in the
207+
/// original collection end up in different batches. This is important because eg. test files will live close to each
208+
/// other in the file system and test scanning wants to scan them in different batches so we don't end up with one
209+
/// batch only containing source files and the other only containing test files.
210+
func partition(intoNumberOfBatches numberOfBatches: Int) -> [[Element]] {
211+
var batches: [[Element]] = Array(
212+
repeating: {
213+
var batch: [Element] = []
214+
batch.reserveCapacity(self.count / numberOfBatches)
215+
return batch
216+
}(),
217+
count: numberOfBatches
218+
)
219+
220+
for (index, element) in self.enumerated() {
221+
batches[index % numberOfBatches].append(element)
222+
}
223+
return batches.filter { !$0.isEmpty }
224+
}
225+
}

0 commit comments

Comments
 (0)