Skip to content

Commit 57267bd

Browse files
author
Samuel Groß
committed
Refactor ProgramBuilder.build() algorithm
The old algorithm had two small flaws: 1. It may have generated an arbitrarily large amount of code in the worst case as it could, in theory, keep invoking recursive code generators even if it was already over budget. 2. It would generally produce somewhat "unbalanced" code: blocks generated early on would generally be larger than blocks generated later as the recursive budget was dependent on the _remaining_ outer budget. The new algorithm fixes both of these issues: it now has an explicit constant (really, a range between two constants) that determines the budget for recursively generated blocks relative to their parents _initial_ budget. This ensures that blocks are more balanced. Further, if the remaining budget becomes "too small", no more recursive code generators are called to avoid overshooting the budget by a lot. See the comment in ProgramBuilder.swift for more details.
1 parent 1d81c09 commit 57267bd

15 files changed

+320
-157
lines changed

Sources/Fuzzilli/Base/ProgramBuilder.swift

Lines changed: 132 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,6 @@ public class ProgramBuilder {
7070
/// Type inference for JavaScript variables.
7171
private var jsTyper: JSTyper
7272

73-
/// During code building, contains the number of instructions that should still be produced.
74-
/// Code building may overshot this number, but will never produce fewer instructions than this.
75-
private var currentBuildingBudget = 0
76-
77-
/// Possible building modes. These are used as argument for build() and determine how the new code is produced.
78-
public enum BuildingMode {
79-
// Run random code generators.
80-
case runningGenerators
81-
// Splice code from other random programs in the corpus.
82-
case splicing
83-
// Do all of the above.
84-
case runningGeneratorsAndSplicing
85-
}
86-
87-
/// The current code building mode.
88-
private var currentBuildingMode = BuildingMode.runningGeneratorsAndSplicing
89-
9073
/// How many variables are currently in scope.
9174
public var numVisibleVariables: Int {
9275
return scopeAnalyzer.visibleVariables.count
@@ -120,8 +103,6 @@ public class ProgramBuilder {
120103
scopeAnalyzer = ScopeAnalyzer()
121104
contextAnalyzer = ContextAnalyzer()
122105
jsTyper.reset()
123-
currentBuildingBudget = 0
124-
currentBuildingMode = .runningGeneratorsAndSplicing
125106
}
126107

127108
/// Finalizes and returns the constructed program, then resets this builder so it can be reused for building another program.
@@ -975,61 +956,155 @@ public class ProgramBuilder {
975956
return true
976957
}
977958

959+
// Code Building Algorithm:
960+
//
961+
// In theory, the basic building algorithm is simply:
962+
//
963+
// var remainingBudget = initialBudget
964+
// while remainingBudget > 0 {
965+
// if probability(0.5) {
966+
// remainingBudget -= runRandomCodeGenerator()
967+
// } else {
968+
// remainingBudget -= performSplicing()
969+
// }
970+
// }
971+
//
972+
// In practice, things become a little more complicated because code generators can be recursive: a function
973+
// generator will emit the function start and end and recursively call into the code building machinery to fill the
974+
// body of the function. The size of the recursively generated blocks is determined as a fraction of the parent's
975+
// *initial budget*. This ensures that the sizes of recursively generated blocks roughly follow the same
976+
// distribution. However, it also means that the initial budget can be overshot by quite a bit: we may end up
977+
// invoking a recursive generator near the end of our budget, which may then for example generate another 0.5x
978+
// initialBudget instructions. However, the benefit of this approach is that there are really only two "knobs" that
979+
// determine the "shape" of the generated code: the factor that determines the recursive budget relative to the
980+
// parent budget and the (absolute) threshold for recursive code generation.
981+
//
982+
983+
/// The first "knob": this mainly determines the shape of generated code as it determines how large block bodies are relative to their surrounding code.
984+
/// This also influences the nesting depth of the generated code, as recursive code generators are only invoked if enough "budget" is still available.
985+
/// These are writable so they can be reconfigured in tests.
986+
var minRecursiveBudgetRelativeToParentBudget = 0.05
987+
var maxRecursiveBudgetRelativeToParentBudget = 0.50
988+
989+
/// The second "knob": the minimum budget required to be able to invoke recursive code generators.
990+
public static let minBudgetForRecursiveCodeGeneration = 5
991+
992+
/// Possible building modes. These are used as argument for build() and determine how the new code is produced.
993+
public enum BuildingMode {
994+
// Run random code generators.
995+
case runningGenerators
996+
// Splice code from other random programs in the corpus.
997+
case splicing
998+
// Do all of the above.
999+
case runningGeneratorsAndSplicing
1000+
}
1001+
9781002
private var openFunctions = [Variable]()
9791003
private func callLikelyRecurses(function: Variable) -> Bool {
9801004
return openFunctions.contains(function)
9811005
}
9821006

1007+
// Keeps track of the state of one buildInternal() invocation. These are tracked in a stack, one entry for each recursive call.
1008+
// This is a class so that updating the currently active state is possible without push/pop.
1009+
private class BuildingState {
1010+
let initialBudget: Int
1011+
let mode: BuildingMode
1012+
var recursiveBuildingAllowed = true
1013+
var nextRecursiveBlockOfCurrentGenerator = 1
1014+
var totalRecursiveBlocksOfCurrentGenerator: Int? = nil
1015+
1016+
init(initialBudget: Int, mode: BuildingMode) {
1017+
assert(initialBudget > 0)
1018+
self.initialBudget = initialBudget
1019+
self.mode = mode
1020+
}
1021+
}
1022+
private var buildStack = [BuildingState]()
1023+
9831024
/// Build random code at the current position in the program.
1025+
///
1026+
/// The first parameter controls the number of emitted instructions: as soon as more than that number of instructions have been emitted, building stops.
1027+
/// This parameter is only a rough estimate as recursive code generators may lead to significantly more code being generated.
1028+
/// Typically, the actual number of generated instructions will be somewhere between n and 2x n.
9841029
public func build(n: Int = 1, by mode: BuildingMode = .runningGeneratorsAndSplicing) {
985-
currentBuildingBudget = n
986-
currentBuildingMode = mode
987-
buildInternal()
1030+
assert(buildStack.isEmpty)
1031+
buildInternal(initialBuildingBudget: n, mode: mode)
1032+
assert(buildStack.isEmpty)
9881033
}
9891034

9901035
/// Recursive code building. Used by CodeGenerators for example to fill the bodies of generated blocks.
991-
public func buildRecursive() {
992-
assert(currentBuildingMode != .splicing)
1036+
public func buildRecursive(block: Int = 1, of numBlocks: Int = 1, n optionalBudget: Int? = nil) {
1037+
assert(!buildStack.isEmpty)
1038+
let parentState = buildStack.last!
9931039

994-
// Generate at least one instruction, even if already below budget.
995-
if currentBuildingBudget <= 0 {
996-
currentBuildingBudget = 1
997-
}
1040+
assert(parentState.mode != .splicing)
1041+
assert(parentState.recursiveBuildingAllowed) // If this fails, a recursive CodeGenerator is probably not marked as recursive.
1042+
assert(numBlocks >= 1)
1043+
assert(block >= 1 && block <= numBlocks)
1044+
assert(parentState.nextRecursiveBlockOfCurrentGenerator == block)
1045+
assert((parentState.totalRecursiveBlocksOfCurrentGenerator ?? numBlocks) == numBlocks)
1046+
1047+
parentState.nextRecursiveBlockOfCurrentGenerator = block + 1
1048+
parentState.totalRecursiveBlocksOfCurrentGenerator = numBlocks
1049+
1050+
// Determine the budget for this recursive call as a fraction of the parent's initial budget.
1051+
let factor = Double.random(in: minRecursiveBudgetRelativeToParentBudget...maxRecursiveBudgetRelativeToParentBudget)
1052+
assert(factor > 0.0 && factor < 1.0)
1053+
let parentBudget = parentState.initialBudget
1054+
var recursiveBudget = Double(parentBudget) * factor
9981055

999-
// Limit recursive building (i.e. bodies of generated blocks) to 25% - 50% of the original budget.
1000-
let remainingOuterBuildingBudget = Int(Double(currentBuildingBudget) * Double.random(in: 0.50...0.75))
1001-
currentBuildingBudget -= remainingOuterBuildingBudget
1056+
// Now split the budget between all sibling blocks.
1057+
recursiveBudget /= Double(numBlocks)
1058+
recursiveBudget.round(.up)
1059+
assert(recursiveBudget >= 1.0)
10021060

1003-
buildInternal()
1061+
// Finally, if a custom budget was requested, choose the smaller of the two values.
1062+
if let requestedBudget = optionalBudget {
1063+
assert(requestedBudget > 0)
1064+
recursiveBudget = min(recursiveBudget, Double(requestedBudget))
1065+
}
10041066

1005-
// Restore the original budget.
1006-
currentBuildingBudget = remainingOuterBuildingBudget
1067+
buildInternal(initialBuildingBudget: Int(recursiveBudget), mode: parentState.mode)
10071068
}
10081069

1009-
private func buildInternal() {
1010-
assert(currentBuildingBudget > 0)
1070+
private func buildInternal(initialBuildingBudget: Int, mode: BuildingMode) {
1071+
assert(initialBuildingBudget > 0)
10111072

1012-
// Splicing or code generation may fail. This counts consecutive failures to avoid infinite looping below.
1073+
// Both splicing and code generation can sometimes fail, for example if no other program with the necessary features exists.
1074+
// To avoid infinite loops, we bail out after a certain number of consecutive failures.
10131075
var consecutiveFailures = 0
10141076

1077+
let state = BuildingState(initialBudget: initialBuildingBudget, mode: mode)
1078+
buildStack.append(state)
1079+
defer { buildStack.removeLast() }
1080+
10151081
// Unless we are only splicing, find all generators that have the required context. We must always have at least one suitable code generator.
10161082
let origContext = context
10171083
var availableGenerators = WeightedList<CodeGenerator>()
1018-
if currentBuildingMode != .splicing {
1084+
if state.mode != .splicing {
10191085
availableGenerators = fuzzer.codeGenerators.filter({ $0.requiredContext.isSubset(of: origContext) })
10201086
assert(!availableGenerators.isEmpty)
10211087
}
10221088

1023-
while currentBuildingBudget > 0 && consecutiveFailures < 10 {
1089+
var remainingBudget = initialBuildingBudget
1090+
while remainingBudget > 0 {
10241091
assert(context == origContext, "Code generation or splicing must not change the current context")
10251092

1026-
var mode = currentBuildingMode
1093+
if state.recursiveBuildingAllowed &&
1094+
remainingBudget < ProgramBuilder.minBudgetForRecursiveCodeGeneration &&
1095+
availableGenerators.contains(where: { !$0.isRecursive }) {
1096+
// No more recursion at this point as we don't have enough budget left.
1097+
state.recursiveBuildingAllowed = false
1098+
availableGenerators = availableGenerators.filter({ !$0.isRecursive })
1099+
assert(state.mode == .splicing || !availableGenerators.isEmpty)
1100+
}
1101+
1102+
var mode = state.mode
10271103
if mode == .runningGeneratorsAndSplicing {
10281104
mode = chooseUniform(from: [.runningGenerators, .splicing])
10291105
}
10301106

1031-
let previousBudget = currentBuildingBudget
1032-
1107+
let codeSizeBefore = code.count
10331108
switch mode {
10341109
case .runningGenerators:
10351110
if !hasVisibleVariables {
@@ -1041,6 +1116,10 @@ public class ProgramBuilder {
10411116
assert(hasVisibleVariables)
10421117
}
10431118

1119+
// Reset the code generator specific part of the state.
1120+
state.nextRecursiveBlockOfCurrentGenerator = 1
1121+
state.totalRecursiveBlocksOfCurrentGenerator = nil
1122+
10441123
// Select a random generator and run it.
10451124
let generator = availableGenerators.randomElement()
10461125
run(generator)
@@ -1052,13 +1131,19 @@ public class ProgramBuilder {
10521131
default:
10531132
fatalError("Unknown ProgramBuildingMode \(mode)")
10541133
}
1134+
let codeSizeAfter = code.count
10551135

1056-
// Both splicing and code generation can sometimes fail, for example if no other program with the necessary features exists.
1057-
// To avoid infinite loops, we bail out after a certain number of failures.
1058-
if currentBuildingBudget == previousBudget {
1059-
consecutiveFailures += 1
1060-
} else {
1136+
let emittedInstructions = codeSizeAfter - codeSizeBefore
1137+
remainingBudget -= emittedInstructions
1138+
if emittedInstructions > 0 {
10611139
consecutiveFailures = 0
1140+
} else {
1141+
consecutiveFailures += 1
1142+
guard consecutiveFailures < 10 else {
1143+
// This should happen very rarely, for example if we're splicing into a restricted context and don't find
1144+
// another sample with instructions that can be copied over, or if we get very unlucky with the code generators.
1145+
return
1146+
}
10621147
}
10631148
}
10641149
}
@@ -1849,8 +1934,6 @@ public class ProgramBuilder {
18491934
// The returned instruction will also contain its index in the program. Use that so the analyzers have access to the index.
18501935
let instr = code.append(instr)
18511936

1852-
currentBuildingBudget -= 1
1853-
18541937
// Update our analyses
18551938
scopeAnalyzer.analyze(instr)
18561939
contextAnalyzer.analyze(instr)

Sources/Fuzzilli/CodeGen/CodeGenerator.swift

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ public class CodeGenerator: Contributor {
4444
/// The name of this code generator
4545
public let name: String
4646

47+
/// Whether this code generator is recursive, i.e. will generate further code for example to generate the body of a block.
48+
/// This is used to determie whether to run a certain code generator. For example, if only a few more instructions should
49+
/// be generated during program building, calling a recursive code generator will likely result in too many instructions.
50+
public let isRecursive: Bool
51+
4752
/// Types of input variables that are required for
4853
/// this code generator to run.
4954
public let inputTypes: [JSType]
@@ -55,8 +60,9 @@ public class CodeGenerator: Contributor {
5560
/// Warpper around the actual generator function called.
5661
private let adapter: GeneratorAdapter
5762

58-
private init(name: String, inputTypes: [JSType], context: Context = .javascript, adapter: GeneratorAdapter) {
63+
fileprivate init(name: String, isRecursive: Bool, inputTypes: [JSType], context: Context = .javascript, adapter: GeneratorAdapter) {
5964
self.name = name
65+
self.isRecursive = isRecursive
6066
self.inputTypes = inputTypes
6167
self.requiredContext = context
6268
self.adapter = adapter
@@ -73,14 +79,26 @@ public class CodeGenerator: Contributor {
7379
}
7480

7581
public convenience init(_ name: String, inContext context: Context = .javascript, _ f: @escaping GeneratorFuncNoArgs) {
76-
self.init(name: name, inputTypes: [], context: context, adapter: GeneratorAdapterNoArgs(f: f))
82+
self.init(name: name, isRecursive: false, inputTypes: [], context: context, adapter: GeneratorAdapterNoArgs(f: f))
7783
}
7884

7985
public convenience init(_ name: String, inContext context: Context = .javascript, input type: JSType, _ f: @escaping GeneratorFunc1Arg) {
80-
self.init(name: name, inputTypes: [type], context: context, adapter: GeneratorAdapter1Arg(f: f))
86+
self.init(name: name, isRecursive: false, inputTypes: [type], context: context, adapter: GeneratorAdapter1Arg(f: f))
8187
}
8288

8389
public convenience init(_ name: String, inContext context: Context = .javascript, inputs types: (JSType, JSType), _ f: @escaping GeneratorFunc2Args) {
84-
self.init(name: name, inputTypes: [types.0, types.1], context: context, adapter: GeneratorAdapter2Args(f: f))
90+
self.init(name: name, isRecursive: false, inputTypes: [types.0, types.1], context: context, adapter: GeneratorAdapter2Args(f: f))
8591
}
8692
}
93+
94+
// Constructors for recursive CodeGenerators.
95+
public func RecursiveCodeGenerator(_ name: String, inContext context: Context = .javascript, _ f: @escaping GeneratorFuncNoArgs) -> CodeGenerator {
96+
return CodeGenerator(name: name, isRecursive: true, inputTypes: [], context: context, adapter: GeneratorAdapterNoArgs(f: f))
97+
}
98+
public func RecursiveCodeGenerator(_ name: String, inContext context: Context = .javascript, input type: JSType, _ f: @escaping GeneratorFunc1Arg) -> CodeGenerator {
99+
return CodeGenerator(name: name, isRecursive: true, inputTypes: [type], context: context, adapter: GeneratorAdapter1Arg(f: f))
100+
}
101+
public func RecursiveCodeGenerator(_ name: String, inContext context: Context = .javascript, inputs types: (JSType, JSType), _ f: @escaping GeneratorFunc2Args) -> CodeGenerator {
102+
return CodeGenerator(name: name, isRecursive: true, inputTypes: [types.0, types.1], context: context, adapter: GeneratorAdapter2Args(f: f))
103+
}
104+

0 commit comments

Comments
 (0)