Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 27 additions & 143 deletions src/rules/go_taint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
//! `TaintSpec`.

use super::common::AliasTable;
use super::taint_engine::{node_text, sanitizer_fingerprints_eq, TaintState};
use super::taint_engine::{
build_batched_taint_groups, cross_file_taint_finding, match_call_sink, node_text,
push_attributed_findings, taint_finding_for_node, TaintState,
};
pub use super::taint_engine::{
BatchedRule, NodeMatcher, ReturnSummary, RuleFilter, TaintFinding, TaintSpec,
};
Expand Down Expand Up @@ -185,75 +188,13 @@ pub fn analyze_tree_batched<'a>(
return Vec::new();
}

// Group rules that share the same sanitizer matchers. Sanitizers
// change taint-state semantics (they clear taint) so rules with
// different sanitizer profiles must NOT be merged into the same
// spec. Sinks and sources, by contrast, don't affect state during
// a walk — only findings — so we can safely union them within a
// group.
let mut groups: Vec<Vec<usize>> = Vec::new();
for (i, r) in rules.iter().enumerate() {
let mut placed = false;
for g in groups.iter_mut() {
// First rule in each group is representative.
let rep = rules[g[0]].spec;
if sanitizer_fingerprints_eq(&rep.sanitizers, &r.spec.sanitizers) {
g.push(i);
placed = true;
break;
}
}
if !placed {
groups.push(vec![i]);
}
}

let mut out: Vec<(String, TaintFinding)> = Vec::new();
for group in &groups {
// Union all sources and sinks from every rule in the group.
// Using dedup-by-description keeps the matcher list tight
// without sacrificing correctness (duplicate descriptions would
// only cause duplicate findings).
let mut merged_sources: Vec<NodeMatcher> = Vec::new();
let mut merged_sinks: Vec<NodeMatcher> = Vec::new();
let mut seen_source_descs: HashSet<String> = HashSet::new();
let mut seen_sink_descs: HashSet<String> = HashSet::new();
let mut sink_to_rule: HashMap<String, String> = HashMap::new();
let mut allowed_rule_ids: HashSet<String> = HashSet::new();

for &idx in group {
let r = &rules[idx];
allowed_rule_ids.insert(r.rule_id.to_string());
for src in &r.spec.sources {
if seen_source_descs.insert(src.description().to_string()) {
merged_sources.push(src.clone());
}
}
for sink in &r.spec.sinks {
// If two rules declare the same sink description, keep
// the first rule's attribution. In the default ruleset
// sink descriptions are unique per rule.
sink_to_rule
.entry(sink.description().to_string())
.or_insert_with(|| r.rule_id.to_string());
if seen_sink_descs.insert(sink.description().to_string()) {
merged_sinks.push(sink.clone());
}
}
}

let sanitizers = rules[group[0]].spec.sanitizers.clone();
let merged_spec = TaintSpec {
sources: merged_sources,
sinks: merged_sinks,
sanitizers,
};

for group in build_batched_taint_groups(rules) {
// Pass 1: compute summaries once for the entire group.
let empty_summary = ReturnSummary::new();
let pass1_ctx = AnalysisContext {
source,
spec: &merged_spec,
spec: &group.spec,
aliases,
summaries: &empty_summary,
cross_file: None,
Expand All @@ -273,36 +214,22 @@ pub fn analyze_tree_batched<'a>(
let cross_file_for_group = cross_file.map(|cf| CrossFileInfo {
same_package_paths: cf.same_package_paths,
summaries: cf.summaries,
rule_filter: RuleFilter::Any(&allowed_rule_ids),
rule_filter: RuleFilter::Any(&group.allowed_rule_ids),
});
let ctx = AnalysisContext {
source,
spec: &merged_spec,
spec: &group.spec,
aliases,
summaries: &summaries,
cross_file: cross_file_for_group.as_ref(),
sink_to_rule: Some(&sink_to_rule),
sink_to_rule: Some(&group.sink_to_rule),
};
let mut group_findings: Vec<TaintFinding> = Vec::new();
collect_function_defs(root, &mut |func_node| {
analyze_function(func_node, &ctx, &mut group_findings);
});

// Attribute each finding back to the rule id. Intra-file
// findings carry a `rule_id_hint` set by `handle_call`.
// Cross-file findings carry one set by `handle_cross_file_call`.
// For safety, fall back to the sink-description lookup when the
// hint is missing.
for mut f in group_findings {
let rule_id = f
.rule_id_hint
.clone()
.or_else(|| sink_to_rule.get(f.sink_description.as_str()).cloned());
if let Some(rid) = rule_id {
f.rule_id_hint = Some(rid.clone());
out.push((rid, f));
}
}
push_attributed_findings(&mut out, group_findings, &group.sink_to_rule);
}

out
Expand Down Expand Up @@ -950,55 +877,22 @@ fn handle_call(
Some(a) => a.resolve(callee_raw.as_ref()),
None => Cow::Borrowed(callee_raw.as_ref()),
};
// The final segment of the callee; used by `MethodName` sink
// matching. For `db.Query` this is `"Query"`; for a bare `exec`
// it's `"exec"`.
let final_segment = resolved.rsplit('.').next().unwrap_or(resolved.as_ref());

let sink_desc = ctx.spec.sinks.iter().find_map(|m| match m {
NodeMatcher::Call {
canonical,
description,
} if canonical.as_str() == resolved.as_ref() => Some(description.clone()),
NodeMatcher::MethodName {
method,
description,
} if method == final_segment => Some(description.clone()),
_ => None,
});
// When the engine is running in batched mode, map the matched sink
// description back to the rule it came from so the caller can
// dispatch the finding correctly. `None` in single-rule mode.
let sink_desc = sink_desc.map(|d| {
let rule = ctx
.sink_to_rule
.and_then(|m| m.get(d.as_str()))
.map(|s| s.to_string());
(d, rule)
});

if let Some((sink_desc, sink_rule_id)) = sink_desc {
if let Some(sink) = match_call_sink(ctx.spec, resolved.as_ref(), ctx.sink_to_rule) {
let Some(args) = node.child_by_field_name("arguments") else {
return;
};
let mut cursor = args.walk();
for arg in args.named_children(&mut cursor) {
if let Some((source_desc, src_line)) = expression_taint(arg, ctx, state) {
let start = node.start_position();
let end = node.end_position();
findings.push(TaintFinding {
sink_start_byte: node.start_byte(),
sink_end_byte: node.end_byte(),
sink_line: start.row + 1,
sink_column: start.column + 1,
sink_end_line: end.row + 1,
sink_end_column: end.column + 1,
source_description: source_desc,
sink_description: sink_desc.clone(),
source_line: src_line,
rule_id_hint: sink_rule_id.clone(),
hops: 1,
});
findings.push(taint_finding_for_node(
node,
source_desc,
sink.description.clone(),
src_line,
sink.rule_id_hint.clone(),
1,
));
break;
}
}
Expand Down Expand Up @@ -1069,24 +963,14 @@ fn handle_cross_file_call(
}
let arg = arg_nodes[flow.param_index];
if let Some((source_desc, src_line)) = expression_taint(arg, ctx, state) {
let start = node.start_position();
let end = node.end_position();
findings.push(TaintFinding {
sink_start_byte: node.start_byte(),
sink_end_byte: node.end_byte(),
sink_line: start.row + 1,
sink_column: start.column + 1,
sink_end_line: end.row + 1,
sink_end_column: end.column + 1,
source_description: source_desc,
sink_description: format!(
"{} (via cross-file call to {})",
flow.sink_description, func_name
),
source_line: src_line,
rule_id_hint: Some(flow.sink_rule_id.clone()),
hops: 2,
});
findings.push(cross_file_taint_finding(
node,
source_desc,
src_line,
&flow.sink_description,
func_name,
&flow.sink_rule_id,
));
// One finding per cross-file call is enough.
return;
}
Expand Down
Loading