Skip to content

Commit 095344c

Browse files
pierre-loup-tristant-sonarsourcenils-werner-sonarsourceclaude
authored andcommitted
SONARPY-4278 Narrow S5852 to exponential backtracking and create S8786 (#1190)
Co-authored-by: Nils Werner <64034005+nils-werner-sonarsource@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com> GitOrigin-RevId: 1bd378324f03e3ab6c032c0f4951629e9959ccc5
1 parent 7cb3550 commit 095344c

12 files changed

Lines changed: 446 additions & 139 deletions

File tree

python-checks/src/main/java/org/sonar/python/checks/OpenSourceCheckList.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
8888
import org.sonar.python.checks.regex.StringReplaceCheck;
8989
import org.sonar.python.checks.regex.SuperfluousCurlyBraceCheck;
90+
import org.sonar.python.checks.regex.SuperLinearRegexCheck;
9091
import org.sonar.python.checks.regex.UnquantifiedNonCapturingGroupCheck;
9192
import org.sonar.python.checks.regex.UnusedGroupNamesCheck;
9293
import org.sonar.python.checks.regex.VerboseRegexCheck;
@@ -424,6 +425,7 @@ public Stream<Class<?>> getChecks() {
424425
SklearnPipelineSpecifyMemoryArgumentCheck.class,
425426
SklearnPipelineParameterAreCorrectCheck.class,
426427
SuperfluousCurlyBraceCheck.class,
428+
SuperLinearRegexCheck.class,
427429
SynchronousFileOperationsInAsyncCheck.class,
428430
SynchronousHttpOperationsInAsyncCheck.class,
429431
SynchronousSubprocessOperationsInAsyncCheck.class,
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) SonarSource Sàrl
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* You can redistribute and/or modify this program under the terms of
7+
* the Sonar Source-Available License Version 1, as published by SonarSource Sàrl.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12+
* See the Sonar Source-Available License for more details.
13+
*
14+
* You should have received a copy of the Sonar Source-Available License
15+
* along with this program; if not, see https://sonarsource.com/license/ssal/
16+
*/
17+
package org.sonar.python.checks.regex;
18+
19+
import java.util.Optional;
20+
import org.sonar.plugins.python.api.tree.CallExpression;
21+
import org.sonarsource.analyzer.commons.regex.MatchType;
22+
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
23+
import org.sonarsource.analyzer.commons.regex.finders.RedosFinder;
24+
25+
public abstract class AbstractRedosCheck extends AbstractRegexCheck {
26+
27+
@Override
28+
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
29+
MatchType matchType = RedosMatchTypeHelper.getMatchTypeFromCalledMethod(regexFunctionCall);
30+
new PythonRedosFinder().checkRegex(regexParseResult, matchType, this::addIssue);
31+
}
32+
33+
protected abstract Optional<String> buildMessage(RedosFinder.BacktrackingType backtrackingType, boolean regexContainsBackReference);
34+
35+
private class PythonRedosFinder extends RedosFinder {
36+
@Override
37+
protected Optional<String> message(RedosFinder.BacktrackingType backtrackingType, boolean regexContainsBackReference) {
38+
return buildMessage(backtrackingType, regexContainsBackReference);
39+
}
40+
}
41+
}

python-checks/src/main/java/org/sonar/python/checks/regex/RedosCheck.java

Lines changed: 7 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -18,111 +18,23 @@
1818

1919

2020
import java.util.Optional;
21-
import java.util.Set;
22-
import java.util.function.Predicate;
2321
import org.sonar.check.Rule;
24-
import org.sonar.plugins.python.api.symbols.Symbol;
25-
import org.sonar.plugins.python.api.symbols.Usage;
26-
import org.sonar.plugins.python.api.tree.AssignmentStatement;
27-
import org.sonar.plugins.python.api.tree.CallExpression;
28-
import org.sonar.plugins.python.api.tree.HasSymbol;
29-
import org.sonar.plugins.python.api.tree.Name;
30-
import org.sonar.plugins.python.api.tree.Tree;
31-
import org.sonar.python.tree.TreeUtils;
32-
import org.sonarsource.analyzer.commons.regex.MatchType;
33-
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
3422
import org.sonarsource.analyzer.commons.regex.finders.RedosFinder;
3523

36-
3724
@Rule(key = "S5852")
38-
public class RedosCheck extends AbstractRegexCheck {
25+
public class RedosCheck extends AbstractRedosCheck {
3926

4027
private static final String MESSAGE = """
4128
Make sure the regex used here, which is vulnerable to %s runtime due to backtracking,\
4229
cannot lead to denial of service.""";
4330
private static final String EXP = "exponential";
44-
private static final String POLY = "polynomial";
45-
private static final Set<String> FULL_MATCH_METHODS = Set.of("fullmatch");
46-
private static final Set<String> PARTIAL_MATCH_METHODS = Set.of("findall", "search", "split", "sub", "subn");
47-
private static final String COMPILE_METHOD = "compile";
4831

4932
@Override
50-
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
51-
MatchType matchType = getMatchTypeFromCalledMethod(regexFunctionCall);
52-
new PythonRedosFinder().checkRegex(regexParseResult, matchType, this::addIssue);
53-
}
54-
55-
private static MatchType getMatchTypeFromCalledMethod(CallExpression regexFunctionCall) {
56-
Symbol symbol = regexFunctionCall.calleeSymbol();
57-
if (symbol == null) {
58-
// Defensive, callee symbol should have been checked prior to calling "checkRegex"
59-
return MatchType.UNKNOWN;
60-
}
61-
if (FULL_MATCH_METHODS.contains(symbol.name())) {
62-
return MatchType.FULL;
63-
}
64-
if (PARTIAL_MATCH_METHODS.contains(symbol.name())) {
65-
return MatchType.PARTIAL;
66-
}
67-
if (COMPILE_METHOD.equals(symbol.name())) {
68-
return matchTypeOfCompiledPattern(regexFunctionCall);
69-
}
70-
return MatchType.UNKNOWN;
71-
}
72-
73-
private static MatchType matchTypeOfCompiledPattern(CallExpression regexFunctionCall) {
74-
return Optional.ofNullable(TreeUtils.firstAncestorOfKind(regexFunctionCall, Tree.Kind.ASSIGNMENT_STMT))
75-
.map(AssignmentStatement.class::cast)
76-
.map(a -> a.lhsExpressions().get(0).expressions().get(0))
77-
.filter(lhs -> lhs.is(Tree.Kind.NAME))
78-
.map(n -> (Name) n)
79-
.map(HasSymbol::symbol)
80-
.map(RedosCheck::getMatchTypeFromSymbolUsages)
81-
.orElse(MatchType.UNKNOWN);
82-
}
83-
84-
private static MatchType getMatchTypeFromSymbolUsages(Symbol s) {
85-
boolean isUsedForFullMatch = s.usages().stream().map(Usage::tree).anyMatch(t -> isUsedInMethod(t, FULL_MATCH_METHODS));
86-
boolean isUsedForPartialMatch = s.usages().stream().map(Usage::tree).anyMatch(t -> isUsedInMethod(t, PARTIAL_MATCH_METHODS));
87-
return getMatchType(isUsedForFullMatch, isUsedForPartialMatch);
88-
}
89-
90-
private static MatchType getMatchType(boolean isUsedForFullMatch, boolean isUsedForPartialMatch) {
91-
if (isUsedForFullMatch && isUsedForPartialMatch) {
92-
return MatchType.BOTH;
93-
}
94-
if (isUsedForFullMatch) {
95-
return MatchType.FULL;
96-
}
97-
if (isUsedForPartialMatch) {
98-
return MatchType.PARTIAL;
99-
}
100-
return MatchType.UNKNOWN;
101-
}
102-
103-
private static boolean isUsedInMethod(Tree tree, Set<String> methodNames) {
104-
return TreeUtils.firstAncestor(tree, isCallToMethod(methodNames)) != null;
105-
}
106-
107-
private static Predicate<Tree> isCallToMethod(Set<String> methodNames) {
108-
return tree -> Optional.ofNullable(tree)
109-
.filter(t -> t.is(Tree.Kind.CALL_EXPR))
110-
.map(CallExpression.class::cast)
111-
.map(CallExpression::calleeSymbol)
112-
.map(Symbol::name)
113-
.filter(methodNames::contains)
114-
.isPresent();
115-
}
116-
117-
static class PythonRedosFinder extends RedosFinder {
118-
119-
@Override
120-
protected Optional<String> message(RedosFinder.BacktrackingType backtrackingType, boolean regexContainsBackReference) {
121-
return switch (backtrackingType) {
122-
case ALWAYS_EXPONENTIAL, QUADRATIC_WHEN_OPTIMIZED, LINEAR_WHEN_OPTIMIZED -> Optional.of(String.format(MESSAGE, EXP));
123-
case ALWAYS_QUADRATIC -> Optional.of(String.format(MESSAGE, POLY));
124-
default -> Optional.empty();
125-
};
126-
}
33+
protected Optional<String> buildMessage(RedosFinder.BacktrackingType backtrackingType, boolean regexContainsBackReference) {
34+
return switch (backtrackingType) {
35+
// Python has no JIT-style optimisation for QUADRATIC_WHEN_OPTIMIZED or LINEAR_WHEN_OPTIMIZED, so it stays in S5852
36+
case ALWAYS_EXPONENTIAL, QUADRATIC_WHEN_OPTIMIZED, LINEAR_WHEN_OPTIMIZED -> Optional.of(String.format(MESSAGE, EXP));
37+
default -> Optional.empty();
38+
};
12739
}
12840
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) SonarSource Sàrl
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* You can redistribute and/or modify this program under the terms of
7+
* the Sonar Source-Available License Version 1, as published by SonarSource Sàrl.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12+
* See the Sonar Source-Available License for more details.
13+
*
14+
* You should have received a copy of the Sonar Source-Available License
15+
* along with this program; if not, see https://sonarsource.com/license/ssal/
16+
*/
17+
package org.sonar.python.checks.regex;
18+
19+
import java.util.Optional;
20+
import java.util.Set;
21+
import java.util.function.Predicate;
22+
import org.sonar.plugins.python.api.symbols.Symbol;
23+
import org.sonar.plugins.python.api.symbols.Usage;
24+
import org.sonar.plugins.python.api.tree.AssignmentStatement;
25+
import org.sonar.plugins.python.api.tree.CallExpression;
26+
import org.sonar.plugins.python.api.tree.HasSymbol;
27+
import org.sonar.plugins.python.api.tree.Name;
28+
import org.sonar.plugins.python.api.tree.Tree;
29+
import org.sonar.python.tree.TreeUtils;
30+
import org.sonarsource.analyzer.commons.regex.MatchType;
31+
32+
final class RedosMatchTypeHelper {
33+
34+
private static final Set<String> FULL_MATCH_METHODS = Set.of("fullmatch");
35+
private static final Set<String> PARTIAL_MATCH_METHODS = Set.of("findall", "search", "split", "sub", "subn");
36+
private static final String COMPILE_METHOD = "compile";
37+
38+
private RedosMatchTypeHelper() {
39+
}
40+
41+
static MatchType getMatchTypeFromCalledMethod(CallExpression regexFunctionCall) {
42+
Symbol symbol = regexFunctionCall.calleeSymbol();
43+
if (symbol == null) {
44+
// Defensive: callee symbol should have been checked prior to calling "checkRegex"
45+
return MatchType.UNKNOWN;
46+
}
47+
if (FULL_MATCH_METHODS.contains(symbol.name())) {
48+
return MatchType.FULL;
49+
}
50+
if (PARTIAL_MATCH_METHODS.contains(symbol.name())) {
51+
return MatchType.PARTIAL;
52+
}
53+
if (COMPILE_METHOD.equals(symbol.name())) {
54+
return matchTypeOfCompiledPattern(regexFunctionCall);
55+
}
56+
return MatchType.UNKNOWN;
57+
}
58+
59+
private static MatchType matchTypeOfCompiledPattern(CallExpression regexFunctionCall) {
60+
return Optional.ofNullable(TreeUtils.firstAncestorOfKind(regexFunctionCall, Tree.Kind.ASSIGNMENT_STMT))
61+
.map(AssignmentStatement.class::cast)
62+
.map(a -> a.lhsExpressions().get(0).expressions().get(0))
63+
.filter(lhs -> lhs.is(Tree.Kind.NAME))
64+
.map(n -> (Name) n)
65+
.map(HasSymbol::symbol)
66+
.map(RedosMatchTypeHelper::getMatchTypeFromSymbolUsages)
67+
.orElse(MatchType.UNKNOWN);
68+
}
69+
70+
private static MatchType getMatchTypeFromSymbolUsages(Symbol s) {
71+
boolean isUsedForFullMatch = s.usages().stream().map(Usage::tree).anyMatch(t -> isUsedInMethod(t, FULL_MATCH_METHODS));
72+
boolean isUsedForPartialMatch = s.usages().stream().map(Usage::tree).anyMatch(t -> isUsedInMethod(t, PARTIAL_MATCH_METHODS));
73+
return getMatchType(isUsedForFullMatch, isUsedForPartialMatch);
74+
}
75+
76+
private static MatchType getMatchType(boolean isUsedForFullMatch, boolean isUsedForPartialMatch) {
77+
if (isUsedForFullMatch && isUsedForPartialMatch) {
78+
return MatchType.BOTH;
79+
}
80+
if (isUsedForFullMatch) {
81+
return MatchType.FULL;
82+
}
83+
if (isUsedForPartialMatch) {
84+
return MatchType.PARTIAL;
85+
}
86+
return MatchType.UNKNOWN;
87+
}
88+
89+
private static boolean isUsedInMethod(Tree tree, Set<String> methodNames) {
90+
return TreeUtils.firstAncestor(tree, isCallToMethod(methodNames)) != null;
91+
}
92+
93+
private static Predicate<Tree> isCallToMethod(Set<String> methodNames) {
94+
return tree -> Optional.ofNullable(tree)
95+
.filter(t -> t.is(Tree.Kind.CALL_EXPR))
96+
.map(CallExpression.class::cast)
97+
.map(CallExpression::calleeSymbol)
98+
.map(Symbol::name)
99+
.filter(methodNames::contains)
100+
.isPresent();
101+
}
102+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* SonarQube Python Plugin
3+
* Copyright (C) SonarSource Sàrl
4+
* mailto:info AT sonarsource DOT com
5+
*
6+
* You can redistribute and/or modify this program under the terms of
7+
* the Sonar Source-Available License Version 1, as published by SonarSource Sàrl.
8+
*
9+
* This program is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12+
* See the Sonar Source-Available License for more details.
13+
*
14+
* You should have received a copy of the Sonar Source-Available License
15+
* along with this program; if not, see https://sonarsource.com/license/ssal/
16+
*/
17+
package org.sonar.python.checks.regex;
18+
19+
20+
import java.util.Optional;
21+
import org.sonar.check.Rule;
22+
import org.sonarsource.analyzer.commons.regex.finders.RedosFinder;
23+
24+
@Rule(key = "S8786")
25+
public class SuperLinearRegexCheck extends AbstractRedosCheck {
26+
27+
private static final String MESSAGE = "Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.";
28+
29+
@Override
30+
protected Optional<String> buildMessage(RedosFinder.BacktrackingType backtrackingType, boolean regexContainsBackReference) {
31+
return switch (backtrackingType) {
32+
case ALWAYS_QUADRATIC -> Optional.of(MESSAGE);
33+
default -> Optional.empty();
34+
};
35+
}
36+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<p>Regular expression engines rely on backtracking to evaluate patterns against input. Certain regex patterns lead to non-linear backtracking, where
2+
the evaluation time grows polynomially with input size.</p>
3+
<h2>Why is this an issue?</h2>
4+
<p>Regular expression engines use backtracking to try all possible execution paths when evaluating a pattern against an input. In some cases, this
5+
leads to non-linear backtracking where the worst-case evaluation time grows polynomially (e.g., O(n²) or O(n³)) with the input size. While not as
6+
severe as catastrophic backtracking, such patterns can significantly degrade application performance when processing large or untrusted inputs.</p>
7+
<p>This rule reports regular expressions that exhibit non-linear backtracking behavior.</p>
8+
<h2>How to fix it</h2>
9+
<p>To fix a regular expression with non-linear backtracking, consider the following strategies:</p>
10+
<ul>
11+
<li>Replace <code>.</code> with negated character classes to exclude separators where applicable (e.g., <code><strong></strong></code><strong>
12+
instead of <code>.</code></strong> before <code>,</code>).</li>
13+
<li>Use bounded quantifiers such as <code>{1,5}</code> to limit repetitions.</li>
14+
<li>Restructure alternations and quantifiers to eliminate ambiguity — avoid patterns where multiple alternatives can match the same character.</li>
15+
<li>Use possessive quantifiers and atomic grouping (available since Python 3.11) to prevent the regex engine from keeping backtracking
16+
positions.</li>
17+
</ul>
18+
<h3>Code examples</h3>
19+
<p>The following regular expression has polynomial backtracking: without a start anchor, the engine retries the pattern at every position, leading to
20+
quadratic evaluation time when there is no match.</p>
21+
<h4>Noncompliant code example</h4>
22+
<pre data-diff-id="1" data-diff-type="noncompliant">
23+
import re
24+
25+
re.search(r"a+b", input) # Noncompliant - polynomial backtracking when the pattern does not match
26+
</pre>
27+
<h4>Compliant solution</h4>
28+
<p>Adding a start anchor prevents the engine from retrying at every position:</p>
29+
<pre data-diff-id="1" data-diff-type="compliant">
30+
import re
31+
32+
re.search(r"^a+b", input) # Compliant - anchor eliminates redundant backtracking positions
33+
</pre>
34+
<h2>Resources</h2>
35+
<h3>Articles &amp; blog posts</h3>
36+
<ul>
37+
<li><a href="https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS">owasp.org</a> - Regular expression Denial of
38+
Service - ReDoS</li>
39+
<li><a href="https://www.regular-expressions.info/catastrophic.html">regular-expressions.info</a> - Runaway Regular Expressions: Catastrophic
40+
Backtracking</li>
41+
</ul>
42+
<h3>Related rules</h3>
43+
<ul>
44+
<li>{rule:python:S5852} - Regular expressions should not cause catastrophic backtracking</li>
45+
</ul>
46+
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"title": "Regular expressions should not cause non-linear backtracking",
3+
"type": "CODE_SMELL",
4+
"code": {
5+
"impacts": {
6+
"RELIABILITY": "MEDIUM"
7+
},
8+
"attribute": "EFFICIENT"
9+
},
10+
"status": "ready",
11+
"quickfix": "unknown",
12+
"remediation": {
13+
"func": "Constant\/Issue",
14+
"constantCost": "20min"
15+
},
16+
"tags": [
17+
"regex",
18+
"performance"
19+
],
20+
"defaultSeverity": "Major",
21+
"ruleSpecification": "RSPEC-8786",
22+
"sqKey": "S8786",
23+
"scope": "All"
24+
}

python-checks/src/main/resources/org/sonar/l10n/py/rules/python/Sonar_agentic_AI_profile.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@
305305
"S8411",
306306
"S8413",
307307
"S8414",
308-
"S8415"
308+
"S8415",
309+
"S8786"
309310
]
310311
}

0 commit comments

Comments
 (0)