Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions its/ruling/src/test/resources/expected/python-S5856.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
'project:biopython/Bio/motifs/pfm.py':[
338,
],
'project:buildbot-slave-0.8.6p1/buildslave/runprocess.py':[
300,
],
'project:django-2.2.3/django/utils/translation/trans_real.py':[
36,
],
'project:mypy-0.782/test-data/stdlib-samples/3.2/glob.py':[
76,
77,
],
'project:numpy-1.16.4/numpy/distutils/mingw32ccompiler.py':[
53,
],
'project:tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py':[
75,
],
'project:tensorflow/python/keras/callbacks.py':[
1373,
],
'project:tensorflow/tools/docs/py_guide_parser.py':[
65,
],
'project:tornado-2.3/demos/appengine/markdown.py':[
826,
],
'project:tornado-2.3/demos/blog/markdown.py':[
826,
],
'project:twisted-12.1.0/twisted/words/protocols/jabber/sasl.py':[
84,
],
}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
<mockito.version>3.9.0</mockito.version>
<sonar.version>8.9.0.43852</sonar.version>
<sonar.orchestrator.version>3.35.1.2719</sonar.orchestrator.version>
<sonar-analyzer-commons.version>1.21.0.807</sonar-analyzer-commons.version>
<sonar-analyzer-commons.version>1.21.0.809</sonar-analyzer-commons.version>
<sonarlint-core.version>6.0.0.32513</sonarlint-core.version>
<sslr.version>1.23</sslr.version>
<protobuf.version>3.17.3</protobuf.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.sonar.python.checks.regex.EmptyStringRepetitionCheck;
import org.sonar.python.checks.regex.ImpossibleBoundariesCheck;
import org.sonar.python.checks.regex.GraphemeClustersInClassesCheck;
import org.sonar.python.checks.regex.InvalidRegexCheck;
import org.sonar.python.checks.regex.RegexComplexityCheck;
import org.sonar.python.checks.regex.SingleCharacterAlternationCheck;
import org.sonar.python.checks.regex.RedundantRegexAlternativesCheck;
Expand Down Expand Up @@ -158,6 +159,7 @@ public static Iterable<Class> getChecks() {
InstanceAndClassMethodsAtLeastOnePositionalCheck.class,
InstanceMethodSelfAsFirstCheck.class,
InvalidOpenModeCheck.class,
InvalidRegexCheck.class,
InvariantReturnCheck.class,
ItemOperationsTypeCheck.class,
IterationOnNonIterableCheck.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* SonarQube Python Plugin
* Copyright (C) 2011-2021 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.python.checks.regex;

import java.util.List;
import java.util.stream.Collectors;
import org.sonar.check.Rule;
import org.sonar.plugins.python.api.tree.CallExpression;
import org.sonarsource.analyzer.commons.regex.RegexIssueLocation;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.SyntaxError;
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;

@Rule(key = "S5856")
public class InvalidRegexCheck extends AbstractRegexCheck {

private static final String MESSAGE_FORMAT = "Fix the syntax error%s inside this regex.";

@Override
public void checkRegex(RegexParseResult regexParseResult, CallExpression regexFunctionCall) {
List<SyntaxError> syntaxErrors = regexParseResult.getSyntaxErrors();
if (!syntaxErrors.isEmpty()) {
reportSyntaxErrors(syntaxErrors);
}
}

private void reportSyntaxErrors(List<SyntaxError> syntaxErrors) {
// report on the first issue
RegexSyntaxElement tree = syntaxErrors.get(0).getOffendingSyntaxElement();
List<RegexIssueLocation> secondaries = syntaxErrors.stream()
.map(error -> new RegexIssueLocation(error.getOffendingSyntaxElement(), error.getMessage()))
.collect(Collectors.toList());

String msg = String.format(MESSAGE_FORMAT, secondaries.size() > 1 ? "s" : "");
addIssue(tree, msg, null, secondaries);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<p>Regular expressions have their own syntax that is understood by regular expression engines. Those engines will throw an exception at runtime if
they are given a regular expression that does not conform to that syntax.</p>
<p>To avoid syntax errors, special characters should be escaped with backslashes when they are intended to be matched literally and references to
capturing groups should use the correctly spelled name or number of the group.</p>
<p>To match a literal string, rather than a regular expression, either all special characters should be escaped or methods that don’t use regular
expressions should be used.</p>
<h2>Noncompliant Code Example</h2>
<pre>
re.compile(r"([")
re.sub(r"([", input, "{")
re.compile(r"(\w+-(\d+)")
</pre>
<h2>Compliant Solution</h2>
<pre>
re.compile(r"\(\[")
input.replace("([", "{")
re.compile(r"(\w+)-(\d+)")
</pre>

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"title": "Regular expressions should be syntactically valid",
"type": "BUG",
"status": "ready",
"remediation": {
"func": "Constant\/Issue",
"constantCost": "15min"
},
"tags": [
"regex"
],
"defaultSeverity": "Critical",
"ruleSpecification": "RSPEC-5856",
"sqKey": "S5856",
"scope": "Main",
"quickfix": "unknown"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* SonarQube Python Plugin
* Copyright (C) 2011-2021 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.python.checks.regex;

import org.junit.Test;
import org.sonar.python.checks.utils.PythonCheckVerifier;

public class InvalidRegexCheckTest {

@Test
public void test() {
PythonCheckVerifier.verify("src/test/resources/checks/regex/invalidRegexCheck.py", new InvalidRegexCheck());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def compliant(input):
re.match(r"[[a-z&&b-e]c]", input) # FN because we don't support intersections
re.match(r"(?i)[A-_d-{]", input) # FN because we ignore case insensitivity unless both ends of the ranges are letters
re.match(r"(?i)[A-z_]", input) # FN because A-z gets misinterpreted as A-Za-z due to the way we handle case insensitivity
re.match(r"[\p{Armenian}x]", input) # FN because we don't support \p at the moment
re.match(r"[\abc]", input)
re.match(r'[\s\'"\:\{\}\[\],&\*\#\?]', input)
re.match(r"[0-9\\d]", input) # Compliant
Expand Down
40 changes: 40 additions & 0 deletions python-checks/src/test/resources/checks/regex/invalidRegexCheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import re


def non_compliant(input):
re.match(r'(', input) # Noncompliant {{Fix the syntax error inside this regex.}}
# ^
# ^@-1< {{Expected ')', but found the end of the regex}}
re.match(r'x{1,2,3}|(', input) # Noncompliant {{Fix the syntax errors inside this regex.}}
# ^
# ^@-1< {{Expected '}', but found ','}}
# ^@-2< {{Expected ')', but found the end of the regex}}
re.match(r'$[a-z^', input) # Noncompliant {{Fix the syntax error inside this regex.}}
# ^
# ^@-1< {{Expected ']', but found the end of the regex}}
re.match(r'(\w+-(\d+)', input) # Noncompliant {{Fix the syntax error inside this regex.}}
# ^
# ^@-1< {{Expected ')', but found the end of the regex}}


def compliant(input):
re.match(r'\(\[', input)


def unsupported_feature(input):
re.match(r'\p{Lower}', input) # Noncompliant
re.match(r'(?>x)', input) # Noncompliant
re.match(r'x*+', input) # Noncompliant


def false_positives():
re.compile(r"\s*([ACGT])\s*[[]*[|]*\s*([0-9.\s]+)\s*[]]*\s*") # Noncompliant
# Noncompliant@+3
re.compile(r'''
([A-Za-z]{1,8}(?:-[A-Za-z0-9]{1,8})*|\*) # "en", "en-au", "x-y-z", "es-419", "*"
(?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:\.0{,3})?))? # Optional "q=1.00", "q=0.8"
(?:\s*,\s*|$) # Multiple accepts per header.
''', re.VERBOSE)
re.compile(r'^\s+\[([\s*[0-9]*)\] ([a-zA-Z0-9_]*)') # Noncompliant
re.compile(r'([^,[\]]*)(\[([^\]]+)\])?$') # Noncompliant
re.compile(r'{.*}') # Noncompliant
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ def non_compliant(input):
re.match(r"\".*?\"", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\"]*+".}}
re.match(r".*?\w", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\W*+".}}
re.match(r".*?\W", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\w*+".}}
re.match(r".*?\p{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\P{L}*+".}}
re.match(r".*?\P{L}", input) # Noncompliant {{Replace this use of a reluctant quantifier with "\p{L}*+".}}
re.match(r"\[.*?\]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^\]]*+".}}
re.match(r".+?[abc]", input) # Noncompliant {{Replace this use of a reluctant quantifier with "[^abc]++".}}
re.match(r"(?-U:\s)*?\S", input)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ public class PythonAnalyzerRegexSource extends PythonRegexSource {
private final boolean isRawString;

public PythonAnalyzerRegexSource(StringElement s) {
// TODO: Do we need the quote? If yes, don't hardcode
super(s.trimmedQuotesValue(), '"');
super(s.trimmedQuotesValue());
String prefix = s.prefix();
Token firstToken = s.firstToken();
sourceLine = firstToken.line();
Expand Down