Skip to content

Commit e6c87d3

Browse files
authored
perf: improve performance of new unresolved-reference rule (#1530)
We've had a few heavy rules in the past, but nothing like this one. That's not really due to the implementation of the rule, but simply the fact that it aggregates every ref found everywhere, which will be a lot no matter what. Using the standard "regal linting itself" benchmark on main before the rule was merged and after shows a 10%+ increase in memory allocated as well as wall clock time: ``` 1111071791 ns/op 3001557280 B/op 58184205 allocs/op 1289814875 ns/op 3363166528 B/op 64788506 allocs/op - unresolved-reference merged ``` There are some design issues to address with aggregate rules that likely would mean bigger wins across the board, but the focus of this PR is solely to optimize the code for the rule itself. While this remains an expensive rule, it's now closer to our other more costly rules. In total, this saves about 2.7 million allocations of the 6.7 million allocs increased with its addition. The steps in which this was done are roughly as follows: ``` 1249527041 ns/op 3337275400 B/op 64261924 allocs/op - exclude input refs 1247609750 ns/op 3325728208 B/op 64061112 allocs/op - get rid of _import_aliases 1264200584 ns/op 3324900160 B/op 64037543 allocs/op - simplify _all_full_path_refs 1182358875 ns/op 3322581056 B/op 64013278 allocs/op - inline parts of _is_resolved_ref 1189268416 ns/op 3317109520 B/op 63942894 allocs/op - skip if name in ast.rule_and_function_names 1182895291 ns/op 3305238432 B/op 63739044 allocs/op - only aggregate data used in report 1179427375 ns/op 3273346464 B/op 63188538 allocs/op - custom result.location alternative 1161155834 ns/op 3210160768 B/op 62267845 allocs/op - defer to_location_object to report 1130275167 ns/op 3204268280 B/op 62150928 allocs/op - a few small tweaks ``` I have a few more ideas for improvements, but as they'd touch on more than just this rule, this is probably a good place to stop for now. Signed-off-by: Anders Eknert <anders@styra.com>
1 parent abe5fff commit e6c87d3

4 files changed

Lines changed: 80 additions & 72 deletions

File tree

bundle/regal/ast/ast.rego

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,15 @@ function_arg_names(rule) := [arg.value | some arg in rule.head.args]
145145

146146
# METADATA
147147
# description: all the rule and function names in the input AST
148-
rule_and_function_names contains ref_to_string(rule.head.ref) if some rule in input.rules
148+
rule_and_function_names contains ref_static_to_string(rule.head.ref) if some rule in input.rules
149149

150150
# METADATA
151151
# description: all identifiers in the input AST (rule and function names, plus imported names)
152152
identifiers := rule_and_function_names | imported_identifiers
153153

154154
# METADATA
155155
# description: all rule names in the input AST (excluding functions)
156-
rule_names contains ref_to_string(rule.head.ref) if some rule in rules
156+
rule_names contains ref_static_to_string(rule.head.ref) if some rule in rules
157157

158158
# METADATA
159159
# description: |
@@ -273,10 +273,7 @@ _format_part(part) := sprintf(".%s", [part.value]) if {
273273
# non-static (i.e. variable) value, if any:
274274
# foo.bar -> foo.bar
275275
# foo.bar[baz] -> foo.bar
276-
ref_static_to_string(ref) := str if {
277-
rs := ref_to_string(ref)
278-
str := _trim_from_var(rs, regex.find_n(`\[[^"]`, rs, 1))
279-
}
276+
ref_static_to_string(ref) := _trim_from_var(rs, regex.find_n(`\[[^"]`, rs, 1)) if rs := ref_to_string(ref)
280277

281278
_trim_from_var(ref_str, vars) := ref_str if {
282279
count(vars) == 0

bundle/regal/rules/imports/unresolved-reference/unresolved_reference.rego

Lines changed: 71 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -9,112 +9,119 @@ import data.regal.util
99

1010
# METADATA
1111
# description: collects exported and full of used refs from each module
12-
aggregate contains result.aggregate(rego.metadata.chain(), {
13-
"exported_rules": object.keys(ast.rule_head_locations),
14-
"expanded_refs": _all_full_path_refs,
15-
"prefix_tree": _prefix_tree,
16-
})
17-
18-
_import_aliases[alias_key] := string_value if {
19-
some alias_key, value in ast.resolved_imports
20-
string_value := concat(".", value)
12+
aggregate contains entry if {
13+
exported := object.keys(ast.rule_head_locations)
14+
15+
entry := result.aggregate(rego.metadata.chain(), {
16+
"exported_rules": exported,
17+
"expanded_refs": _all_full_path_refs,
18+
"prefix_tree": {prefix_path |
19+
some rule_name in exported
20+
rule_path := split(rule_name, ".")
21+
some i in numbers.range(1, count(rule_path))
22+
prefix_path := array.slice(rule_path, 0, i)
23+
},
24+
})
2125
}
2226

2327
# an import is shadowed if it shares name with a rule
2428
_shadowed_imports contains rule_name if {
2529
some rule_name in ast.rule_names
26-
_import_aliases[rule_name]
30+
ast.resolved_imports[rule_name]
2731
}
2832

2933
# an import is shadowed if it shares name with a variable (or function argument)
3034
_shadowed_imports contains var_name if {
3135
var_name := ast.found.vars[_][_][_].value
32-
_import_aliases[var_name]
36+
ast.resolved_imports[var_name]
3337
}
3438

3539
_refs contains ref if {
36-
term := ast.found.refs[_][_].value
37-
name := ast.ref_static_to_string(term)
40+
terms := ast.found.refs[_][_].value
41+
terms[0].value != "input"
3842

39-
not name in ast.builtin_names
43+
name := ast.ref_static_to_string(terms)
4044

41-
path := split(name, ".")
42-
not path[0] in _shadowed_imports
45+
not name in ast.builtin_names
46+
not name in ast.rule_and_function_names
47+
not terms[0].value in _shadowed_imports
4348

44-
ref := object.union(result.location(term), {
49+
# util.to_location_row inlined for some extra performance
50+
row := to_number(regex.replace(terms[0].location, `^(\d+):.*`, "$1"))
51+
ref := {
4552
"name": name,
46-
"path": path,
47-
})
53+
"text": input.regal.file.lines[row - 1],
54+
"location": terms[0].location,
55+
}
4856
}
4957

50-
_all_full_path_refs[ref.name] contains ref if {
58+
_all_full_path_refs[ref.name] contains [ref.location, ref.text] if {
5159
some ref in _refs
52-
ref.path[0] == "data"
60+
startswith(ref.name, "data.")
5361
}
5462

55-
_all_full_path_refs[expanded_ref] contains ref if {
63+
_all_full_path_refs[expanded_ref] contains [ref.location, ref.text] if {
5664
some ref in _refs
57-
full_source_prefix := _import_aliases[ref.path[0]]
58-
59-
full_path_array := array.concat([full_source_prefix], util.rest(ref.path))
60-
61-
expanded_ref := concat(".", full_path_array)
65+
path := split(ref.name, ".")
66+
expanded_ref := concat(".", array.concat(ast.resolved_imports[path[0]], util.rest(path)))
6267
}
6368

6469
# METADATA
6570
# schemas:
6671
# - input: schema.regal.aggregate
6772
aggregate_report contains violation if {
68-
all_exports_in_bundle := {export |
69-
some entry in input.aggregate
70-
some export in entry.aggregate_data.exported_rules
71-
}
72-
73-
prefix_tree := {prefix |
74-
some entry in input.aggregate
75-
some prefix in entry.aggregate_data.prefix_tree
76-
}
73+
all_exports := {export | export := input.aggregate[_].aggregate_data.exported_rules[_]}
74+
prefix_tree := {prefix | prefix := input.aggregate[_].aggregate_data.prefix_tree[_]}
7775

7876
some entry in input.aggregate
79-
some ref_full_name, ref_locations in entry.aggregate_data.expanded_refs
77+
some name, refs in entry.aggregate_data.expanded_refs
8078

81-
# Ignore everything after the first "[" in the ref name. E.g. foo.bar[0].baz becomes foo.bar
82-
simplified_ref_name := split(ref_full_name, "[")[0]
79+
# ignore everything after the first "[" in the ref name. E.g. foo.bar[0].baz becomes foo.bar
80+
ref_name := split(name, "[")[0]
81+
ref_path := split(ref_name, ".")
8382

84-
not _is_resolved_ref(simplified_ref_name, prefix_tree, all_exports_in_bundle)
83+
# a reference is considered resolved with respect to a rule if
84+
# 1: it is the prefix of a rule
85+
# 2: it indexes into a rule - we do not consider the possible data
86+
# 3: the reference is ignored in the config
87+
not ref_path in prefix_tree
88+
not _is_resolved_ref(ref_path, all_exports)
89+
not _is_excepted(ref_name)
8590

86-
some ref in ref_locations
87-
violation := result.fail(rego.metadata.chain(), result.location(ref))
91+
some ref in refs
92+
93+
violation := result.fail(rego.metadata.chain(), _to_location_object(ref[0], ref[1], entry.aggregate_source.file))
8894
}
8995

90-
# METADATA
91-
# description: a reference is valid with respect to a rule if
92-
# custom:
93-
# 1: it is the prefix of a rule
94-
# 2: it indexes into a rule - we do not consider the possible data
95-
# 3: the reference is ignored in the config
96-
_is_resolved_ref(ref_full_name, _, _) if {
96+
_is_excepted(ref_full_name) if {
9797
some exception in config.rules.imports["unresolved-reference"]["except-paths"]
9898
glob.match(exception, [], ref_full_name)
9999
}
100100

101-
_is_resolved_ref(ref_full_name, prefix_tree, _) if {
102-
ref_full_path := split(ref_full_name, ".")
103-
ref_full_path in prefix_tree
104-
}
105-
106-
_is_resolved_ref(ref_full_name, _, all_exports_in_bundle) if {
107-
ref_full_path := split(ref_full_name, ".")
108-
109-
some i in numbers.range(0, count(ref_full_path))
101+
_is_resolved_ref(ref_full_path, all_exports) if {
102+
some i in numbers.range(1, count(ref_full_path))
110103
path_prefix := concat(".", array.slice(ref_full_path, 0, i))
111104

112-
path_prefix in all_exports_in_bundle
105+
path_prefix in all_exports
113106
}
114107

115-
_prefix_tree contains prefix_path if {
116-
some rule_name, _ in ast.rule_head_locations
117-
rule_path := split(rule_name, ".")
118-
some i in numbers.range(0, count(rule_path))
119-
prefix_path := array.slice(rule_path, 0, i)
108+
# like util.to_location_object, but with text and file passed in
109+
# as we don't have access to the usual input.regal.file attributes
110+
# in the context of reporting aggregated data
111+
_to_location_object(loc, text, file) := {"location": {
112+
"file": file,
113+
"row": row,
114+
"col": col,
115+
"text": text,
116+
"end": {
117+
"row": end_row,
118+
"col": end_col,
119+
},
120+
}} if {
121+
[r, c, er, ec] := split(loc, ":")
122+
123+
row := to_number(r)
124+
col := to_number(c)
125+
end_row := to_number(er)
126+
end_col := to_number(ec)
120127
}

bundle/regal/util/util.rego

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ to_location_object(loc) := {
6767

6868
to_location_object(loc) := loc if is_object(loc)
6969

70+
# METADATA
71+
# description: efficiently extracts row number from location string
72+
to_location_row(loc) := to_number(regex.replace(loc, `^(\d+):.*`, "$1"))
73+
7074
_location_to_text(row, col, end_row, end_col) := substring(
7175
input.regal.file.lines[row - 1],
7276
col - 1,

pkg/linter/linter_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ import data.unresolved`,
728728
}
729729
}
730730

731-
// 1111071791 ns/op 3001557280 B/op 58184205 allocs/op
731+
// 1130275167 ns/op 3204268280 B/op 62150928 allocs/op - a few small tweaks
732732
// ...
733733
func BenchmarkRegalLintingItself(b *testing.B) {
734734
linter := NewLinter().
@@ -755,7 +755,7 @@ func BenchmarkRegalLintingItself(b *testing.B) {
755755
}
756756
}
757757

758-
// 1011992167 ns/op 2949576920 B/op 57079296 allocs/op
758+
// 1082386000 ns/op 3147603112 B/op 61016216 allocs/op
759759
// ...
760760
func BenchmarkRegalLintingItselfPrepareOnce(b *testing.B) {
761761
ctx := context.Background()

0 commit comments

Comments
 (0)