Skip to content

[Explicit Module Builds] Add support for creating a reproducer when clang process crashes. #455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Sources/SWBCSupport/CLibclang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,12 @@ extern "C" {
typedef struct CXOpaqueDependencyScannerServiceOptions
*CXDependencyScannerServiceOptions;

enum CXErrorCode
(*clang_experimental_DependencyScanner_generateReproducer)(
int argc, const char *const *argv, const char *WorkingDirectory,
const char *ReproducerLocation, bool UseUniqueReproducerName,
CXString *messageOut);

/**
* Creates a default set of service options.
* Must be disposed with \c
Expand Down Expand Up @@ -1423,6 +1429,7 @@ struct LibclangWrapper {
LOOKUP_OPTIONAL(clang_experimental_cas_replayCompilation);
LOOKUP_OPTIONAL(clang_experimental_cas_ReplayResult_dispose);
LOOKUP_OPTIONAL(clang_experimental_cas_ReplayResult_getStderr);
LOOKUP_OPTIONAL(clang_experimental_DependencyScanner_generateReproducer);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_create);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_dispose);
LOOKUP_OPTIONAL(clang_experimental_DependencyScannerServiceOptions_setDependencyMode);
Expand Down Expand Up @@ -1775,6 +1782,10 @@ extern "C" {
lib->wrapper->fns.clang_experimental_DependencyScannerServiceOptions_setCWDOptimization;
}

bool libclang_has_reproducer_feature(libclang_t lib) {
return lib->wrapper->fns.clang_experimental_DependencyScanner_generateReproducer;
}

libclang_casoptions_t libclang_casoptions_create(libclang_t lib) {
auto opts = lib->wrapper->fns.clang_experimental_cas_Options_create();
return new libclang_casoptions_t_{{lib->wrapper, opts}};
Expand Down Expand Up @@ -2159,6 +2170,22 @@ extern "C" {
return depGraph != nullptr;
}

bool libclang_scanner_generate_reproducer(libclang_scanner_t scanner,
int argc, char *const *argv,
const char *workingDirectory,
const char **message) {
auto lib = scanner->scanner->lib;
LibclangFunctions::CXString messageString;
auto result = lib->fns.clang_experimental_DependencyScanner_generateReproducer(
argc, const_cast<const char**>(argv), workingDirectory,
/*ReproducerLocation=*/NULL, /*UseUniqueReproducerName=*/true, &messageString);
if (message) {
*message = strdup(lib->fns.clang_getCString(messageString));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is libclang guaranteed to return a non null string in messageString here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For success case I think we can make it mandatory for messageString to be present (you need to tell where to find the generated reproducer). Error cases might not populate messageString. Even though they should provide more details, it is fairly easy to return InvalidArgument error without any extra message.

But we can guarantee messageString isn't corrupted and in the worst case it would be an empty string.

I'll think about these cases and would document API contract on libclang side.

}
lib->fns.clang_disposeString(messageString);
return result == LibclangFunctions::CXError_Success;
}

bool libclang_driver_get_actions(libclang_t wrapped_lib,
int argc,
char* const* argv,
Expand Down
6 changes: 6 additions & 0 deletions Sources/SWBCSupport/CLibclang.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ bool libclang_has_cas_up_to_date_checks_feature(libclang_t lib);
/// Whether the libclang has current working directory optimization support.
bool libclang_has_current_working_directory_optimization(libclang_t lib);

bool libclang_has_reproducer_feature(libclang_t lib);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rebase and tag this with CSUPPORT_EXPORT, and do the same for libclang_scanner_generate_reproducer so it gets the __declspec(dllexport) modifier on windows


/// Create the CAS options object.
libclang_casoptions_t libclang_casoptions_create(libclang_t lib);

Expand Down Expand Up @@ -201,6 +203,10 @@ bool libclang_scanner_scan_dependencies(
void (^diagnostics_callback)(const libclang_diagnostic_set_t),
void (^error_callback)(const char *));

bool libclang_scanner_generate_reproducer(
libclang_scanner_t scanner, int argc, char *const *argv, const char *workingDirectory,
const char **message);

/// Get the list of commands invoked by the given Clang driver command line.
///
/// \param argc - The number of arguments.
Expand Down
20 changes: 20 additions & 0 deletions Sources/SWBCore/LibclangVendored/Libclang.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ public final class Libclang {
public var supportsCurrentWorkingDirectoryOptimization: Bool {
libclang_has_current_working_directory_optimization(lib)
}

public var supportsReproducerGeneration: Bool {
libclang_has_reproducer_feature(lib)
}
}

enum DependencyScanningError: Error {
Expand Down Expand Up @@ -269,6 +273,22 @@ public final class DependencyScanner {
}
return fileDeps
}

public func generateReproducer(
commandLine: [String],
workingDirectory: String
) throws -> String {
let args = CStringArray(commandLine)
var messageUnsafe: UnsafePointer<Int8>!
defer { messageUnsafe?.deallocate() }
// The count is `- 1` here, because CStringArray appends a trailing nullptr.
let success = libclang_scanner_generate_reproducer(scanner, CInt(args.cArray.count - 1), args.cArray, workingDirectory, &messageUnsafe);
let message = String(cString: messageUnsafe)
guard success else {
throw Error.dependencyScanErrorString(message)
}
return message
}
}

fileprivate struct ClangDiagnosticSet {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ package final class ClangModuleDependencyGraph {
/// for example, when using `-save-temps`.
package let commands: [CompileCommand]

package let scanningCommandLine: [String]

package let transitiveIncludeTreeIDs: OrderedSet<String>
package let transitiveCompileCommandCacheKeys: OrderedSet<String>

Expand All @@ -121,6 +123,7 @@ package final class ClangModuleDependencyGraph {
moduleDependencies: OrderedSet<Path>,
workingDirectory: Path,
commands: [CompileCommand],
scanningCommandLine: [String],
transitiveIncludeTreeIDs: OrderedSet<String>,
transitiveCompileCommandCacheKeys: OrderedSet<String>,
usesSerializedDiagnostics: Bool
Expand All @@ -131,6 +134,7 @@ package final class ClangModuleDependencyGraph {
self.modules = moduleDependencies
self.workingDirectory = workingDirectory
self.commands = commands
self.scanningCommandLine = scanningCommandLine
self.transitiveIncludeTreeIDs = transitiveIncludeTreeIDs
self.transitiveCompileCommandCacheKeys = transitiveCompileCommandCacheKeys
self.usesSerializedDiagnostics = usesSerializedDiagnostics
Expand All @@ -143,6 +147,7 @@ package final class ClangModuleDependencyGraph {
moduleDependencies: OrderedSet<Path>,
workingDirectory: Path,
command: CompileCommand,
scanningCommandLine: [String],
transitiveIncludeTreeIDs: OrderedSet<String>,
transitiveCompileCommandCacheKeys: OrderedSet<String>,
usesSerializedDiagnostics: Bool
Expand All @@ -153,33 +158,36 @@ package final class ClangModuleDependencyGraph {
self.modules = moduleDependencies
self.workingDirectory = workingDirectory
self.commands = [command]
self.scanningCommandLine = scanningCommandLine
self.transitiveIncludeTreeIDs = transitiveIncludeTreeIDs
self.transitiveCompileCommandCacheKeys = transitiveCompileCommandCacheKeys
self.usesSerializedDiagnostics = usesSerializedDiagnostics
}

package func serialize<T>(to serializer: T) where T : Serializer {
serializer.serializeAggregate(9) {
serializer.serializeAggregate(10) {
serializer.serialize(kind)
serializer.serialize(files)
serializer.serialize(includeTreeID)
serializer.serialize(modules)
serializer.serialize(workingDirectory)
serializer.serialize(commands)
serializer.serialize(scanningCommandLine)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should try to measure the impact of serializing an extra command line here, it should be shorter than the cc1 command but we do want to keep this data small...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the impact in the consumed storage space? Just to know what to measure.

serializer.serialize(transitiveIncludeTreeIDs)
serializer.serialize(transitiveCompileCommandCacheKeys)
serializer.serialize(usesSerializedDiagnostics)
}
}

package init(from deserializer: any Deserializer) throws {
try deserializer.beginAggregate(9)
try deserializer.beginAggregate(10)
self.kind = try deserializer.deserialize()
self.files = try deserializer.deserialize()
self.includeTreeID = try deserializer.deserialize()
self.modules = try deserializer.deserialize()
self.workingDirectory = try deserializer.deserialize()
self.commands = try deserializer.deserialize()
self.scanningCommandLine = try deserializer.deserialize()
self.transitiveIncludeTreeIDs = try deserializer.deserialize()
self.transitiveCompileCommandCacheKeys = try deserializer.deserialize()
self.usesSerializedDiagnostics = try deserializer.deserialize()
Expand Down Expand Up @@ -334,12 +342,13 @@ package final class ClangModuleDependencyGraph {
var moduleTransitiveCacheKeys: [String: OrderedSet<String>] = [:]

let fileDeps: DependencyScanner.FileDependencies
let scanningCommandLine = [compiler] + originalFileArgs
let modulesCallbackErrors = LockedValue<[any Error]>([])
let dependencyPaths = LockedValue<Set<Path>>([])
let requiredTargetDependencies = LockedValue<Set<ScanResult.RequiredDependency>>([])
do {
fileDeps = try clangWithScanner.scanner.scanDependencies(
commandLine: [compiler] + originalFileArgs,
commandLine: scanningCommandLine,
workingDirectory: workingDirectory.str,
lookupOutput: { name, contextHash, kind in
let moduleOutputPath = outputPathForModule(name, contextHash)
Expand Down Expand Up @@ -432,6 +441,7 @@ package final class ClangModuleDependencyGraph {
// Cached builds do not rely on the process working directory, and different scanner working directories should not inhibit task deduplication. The same is true if the scanner reports the working directory can be ignored.
workingDirectory: module.cache_key != nil || module.is_cwd_ignored ? Path.root : workingDirectory,
command: DependencyInfo.CompileCommand(cacheKey: module.cache_key, arguments: commandLine),
scanningCommandLine: scanningCommandLine,
transitiveIncludeTreeIDs: transitiveIncludeTreeIDs,
transitiveCompileCommandCacheKeys: transitiveCommandCacheKeys,
usesSerializedDiagnostics: usesSerializedDiagnostics)
Expand Down Expand Up @@ -513,6 +523,7 @@ package final class ClangModuleDependencyGraph {
// Cached builds do not rely on the process working directory, and different scanner working directories should not inhibit task deduplication
workingDirectory: fileDeps.commands.allSatisfy { $0.cache_key != nil } ? Path.root : workingDirectory,
commands: commands,
scanningCommandLine: scanningCommandLine,
transitiveIncludeTreeIDs: transitiveIncludeTreeIDs,
transitiveCompileCommandCacheKeys: transitiveCommandCacheKeys,
usesSerializedDiagnostics: usesSerializedDiagnostics)
Expand Down Expand Up @@ -549,6 +560,21 @@ package final class ClangModuleDependencyGraph {
return clangWithScanner.casDBs
}

package func generateReproducer(forFailedDependency dependency: DependencyInfo,
libclangPath: Path, casOptions: CASOptions?) throws -> String? {
let clangWithScanner = try libclangWithScanner(
forPath: libclangPath,
casOptions: casOptions,
cacheFallbackIfNotAvailable: false,
core: core
)
guard clangWithScanner.libclang.supportsReproducerGeneration else {
return nil
}
return try clangWithScanner.scanner.generateReproducer(
commandLine: dependency.scanningCommandLine, workingDirectory: dependency.workingDirectory.str)
}

package var isEmpty: Bool {
recordedDependencyInfoRegistry.isEmpty
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,19 @@ public final class ClangCompileTaskAction: TaskAction, BuildValueValidatingTaskA
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to the question about serialization overhead, we should consider whether this should be opt-in or opt-out. I'm not sure I have a strong opinion right now

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also have plans to add reproducer-on-error. That one should be opt-in. But crashes seem to be rare enough that generating a reproducer can be don unconditionally. Honestly, the crashes seem to be rare enough that this entire feature in its current form isn't particularly useful. The value should come from iterating on the feature.

if case .some(.exit(.uncaughtSignal, _)) = outputDelegate.result {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We treat some signals as cancellation; should we avoid creating reproducers in that case?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have a cancellation, does TaskProcessDelegate.commandResult == .cancelled? We can generate a reproducer only when lastResult == .failed to avoid useless reproducers. I think in my testing I've encountered signals to some compilations after the build encountered crashes for a different file.

do {
if let reproducerMessage = try clangModuleDependencyGraph.generateReproducer(
forFailedDependency: dependencyInfo,
libclangPath: explicitModulesPayload.libclangPath,
casOptions: explicitModulesPayload.casOptions) {
outputDelegate.emitOutput(ByteString(encodingAsUTF8: reproducerMessage) + "\n")
}
} catch {
outputDelegate.error(error.localizedDescription)
}
}
return lastResult ?? .failed
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,16 +215,30 @@ final public class PrecompileClangModuleTaskAction: TaskAction, BuildValueValida
enableStrictCASErrors: key.casOptions!.enableStrictCASErrors
)
}
} else if result == .failed && !executionDelegate.userPreferences.enableDebugActivityLogs && !executionDelegate.emitFrontendCommandLines {
let commandString = UNIXShellCommandCodec(
encodingStrategy: .backslashes,
encodingBehavior: .fullCommandLine
).encode(commandLine)

// <rdar://59354519> We need to find a way to use the generic infrastructure for displaying the command line in
// the build log.
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
} else if result == .failed {
if !executionDelegate.userPreferences.enableDebugActivityLogs && !executionDelegate.emitFrontendCommandLines {
let commandString = UNIXShellCommandCodec(
encodingStrategy: .backslashes,
encodingBehavior: .fullCommandLine
).encode(commandLine)

// <rdar://59354519> We need to find a way to use the generic infrastructure for displaying the command line in
// the build log.
outputDelegate.emitOutput("Failed frontend command:\n")
outputDelegate.emitOutput(ByteString(encodingAsUTF8: commandString) + "\n")
}
if case .some(.exit(.uncaughtSignal, _)) = outputDelegate.result {
do {
if let reproducerMessage = try clangModuleDependencyGraph.generateReproducer(
forFailedDependency: dependencyInfo,
libclangPath: key.libclangPath,
casOptions: key.casOptions) {
outputDelegate.emitOutput(ByteString(encodingAsUTF8: reproducerMessage) + "\n")
}
} catch {
outputDelegate.error(error.localizedDescription)
}
}
}
return result
} catch {
Expand Down