Skip to content

fix [invalid_regex] not recognizing new syntax introduced after regex-1.8.0 #10682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clippy_lints/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if_chain = "1.0"
itertools = "0.10.1"
pulldown-cmark = { version = "0.9", default-features = false }
quine-mc_cluskey = "0.2"
regex-syntax = "0.6"
regex-syntax = "0.7"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", optional = true }
tempfile = { version = "3.3.0", optional = true }
Expand Down
23 changes: 11 additions & 12 deletions clippy_lints/src/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,30 +129,32 @@ fn const_str<'tcx>(cx: &LateContext<'tcx>, e: &'tcx Expr<'_>) -> Option<String>
}

fn is_trivial_regex(s: &regex_syntax::hir::Hir) -> Option<&'static str> {
use regex_syntax::hir::Anchor::{EndText, StartText};
use regex_syntax::hir::HirKind::{Alternation, Anchor, Concat, Empty, Literal};
use regex_syntax::hir::HirKind::{Alternation, Concat, Empty, Literal, Look};
use regex_syntax::hir::Look as HirLook;

let is_literal = |e: &[regex_syntax::hir::Hir]| e.iter().all(|e| matches!(*e.kind(), Literal(_)));

match *s.kind() {
Empty | Anchor(_) => Some("the regex is unlikely to be useful as it is"),
Empty | Look(_) => Some("the regex is unlikely to be useful as it is"),
Literal(_) => Some("consider using `str::contains`"),
Alternation(ref exprs) => {
if exprs.iter().all(|e| e.kind().is_empty()) {
if exprs.iter().all(|e| matches!(e.kind(), Empty)) {
Some("the regex is unlikely to be useful as it is")
} else {
None
}
},
Concat(ref exprs) => match (exprs[0].kind(), exprs[exprs.len() - 1].kind()) {
(&Anchor(StartText), &Anchor(EndText)) if exprs[1..(exprs.len() - 1)].is_empty() => {
(&Look(HirLook::Start), &Look(HirLook::End)) if exprs[1..(exprs.len() - 1)].is_empty() => {
Some("consider using `str::is_empty`")
},
(&Anchor(StartText), &Anchor(EndText)) if is_literal(&exprs[1..(exprs.len() - 1)]) => {
(&Look(HirLook::Start), &Look(HirLook::End)) if is_literal(&exprs[1..(exprs.len() - 1)]) => {
Some("consider using `==` on `str`s")
},
(&Anchor(StartText), &Literal(_)) if is_literal(&exprs[1..]) => Some("consider using `str::starts_with`"),
(&Literal(_), &Anchor(EndText)) if is_literal(&exprs[1..(exprs.len() - 1)]) => {
(&Look(HirLook::Start), &Literal(_)) if is_literal(&exprs[1..]) => {
Some("consider using `str::starts_with`")
},
(&Literal(_), &Look(HirLook::End)) if is_literal(&exprs[1..(exprs.len() - 1)]) => {
Some("consider using `str::ends_with`")
},
_ if is_literal(exprs) => Some("consider using `str::contains`"),
Expand All @@ -175,10 +177,7 @@ fn check_set<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>, utf8: bool) {
}

fn check_regex<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'_>, utf8: bool) {
let mut parser = regex_syntax::ParserBuilder::new()
.unicode(true)
.allow_invalid_utf8(!utf8)
.build();
let mut parser = regex_syntax::ParserBuilder::new().unicode(true).utf8(!utf8).build();

if let ExprKind::Lit(ref lit) = expr.kind {
if let LitKind::Str(ref r, style) = lit.node {
Expand Down
2 changes: 2 additions & 0 deletions tests/ui/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ fn syntax_error() {
let set_error = RegexSet::new(&[OPENING_PAREN, r"[a-z]+\.(com|org|net)"]);
let bset_error = BRegexSet::new(&[OPENING_PAREN, r"[a-z]+\.(com|org|net)"]);

// These following three cases are considering valid since regex-1.8.0
let raw_string_error = Regex::new(r"[...\/...]");
let raw_string_error = Regex::new(r#"[...\/...]"#);
let _ = Regex::new(r"(?<hi>hi)").unwrap();

let escaped_string_span = Regex::new("\\b\\c");

Expand Down
40 changes: 14 additions & 26 deletions tests/ui/regex.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -82,122 +82,110 @@ error: regex parse error:
LL | let bset_error = BRegexSet::new(&[OPENING_PAREN, r"[a-z]+/.(com|org|net)"]);
| ^^^^^^^^^^^^^

error: regex syntax error: unrecognized escape sequence
--> $DIR/regex.rs:37:45
|
LL | let raw_string_error = Regex::new(r"[...//...]");
| ^^

error: regex syntax error: unrecognized escape sequence
--> $DIR/regex.rs:38:46
|
LL | let raw_string_error = Regex::new(r#"[...//...]"#);
| ^^

error: regex parse error:
/b/c
^^
error: unrecognized escape sequence
--> $DIR/regex.rs:40:42
--> $DIR/regex.rs:42:42
|
LL | let escaped_string_span = Regex::new("/b/c");
| ^^^^^^^^
|
= help: consider using a raw string literal: `r".."`

error: regex syntax error: duplicate flag
--> $DIR/regex.rs:42:34
--> $DIR/regex.rs:44:34
|
LL | let aux_span = Regex::new("(?ixi)");
| ^ ^

error: trivial regex
--> $DIR/regex.rs:46:33
--> $DIR/regex.rs:48:33
|
LL | let trivial_eq = Regex::new("^foobar$");
| ^^^^^^^^^^
|
= help: consider using `==` on `str`s

error: trivial regex
--> $DIR/regex.rs:48:48
--> $DIR/regex.rs:50:48
|
LL | let trivial_eq_builder = RegexBuilder::new("^foobar$");
| ^^^^^^^^^^
|
= help: consider using `==` on `str`s

error: trivial regex
--> $DIR/regex.rs:50:42
--> $DIR/regex.rs:52:42
|
LL | let trivial_starts_with = Regex::new("^foobar");
| ^^^^^^^^^
|
= help: consider using `str::starts_with`

error: trivial regex
--> $DIR/regex.rs:52:40
--> $DIR/regex.rs:54:40
|
LL | let trivial_ends_with = Regex::new("foobar$");
| ^^^^^^^^^
|
= help: consider using `str::ends_with`

error: trivial regex
--> $DIR/regex.rs:54:39
--> $DIR/regex.rs:56:39
|
LL | let trivial_contains = Regex::new("foobar");
| ^^^^^^^^
|
= help: consider using `str::contains`

error: trivial regex
--> $DIR/regex.rs:56:39
--> $DIR/regex.rs:58:39
|
LL | let trivial_contains = Regex::new(NOT_A_REAL_REGEX);
| ^^^^^^^^^^^^^^^^
|
= help: consider using `str::contains`

error: trivial regex
--> $DIR/regex.rs:58:40
--> $DIR/regex.rs:60:40
|
LL | let trivial_backslash = Regex::new("a/.b");
| ^^^^^^^
|
= help: consider using `str::contains`

error: trivial regex
--> $DIR/regex.rs:61:36
--> $DIR/regex.rs:63:36
|
LL | let trivial_empty = Regex::new("");
| ^^
|
= help: the regex is unlikely to be useful as it is

error: trivial regex
--> $DIR/regex.rs:63:36
--> $DIR/regex.rs:65:36
|
LL | let trivial_empty = Regex::new("^");
| ^^^
|
= help: the regex is unlikely to be useful as it is

error: trivial regex
--> $DIR/regex.rs:65:36
--> $DIR/regex.rs:67:36
|
LL | let trivial_empty = Regex::new("^$");
| ^^^^
|
= help: consider using `str::is_empty`

error: trivial regex
--> $DIR/regex.rs:67:44
--> $DIR/regex.rs:69:44
|
LL | let binary_trivial_empty = BRegex::new("^$");
| ^^^^
|
= help: consider using `str::is_empty`

error: aborting due to 25 previous errors
error: aborting due to 23 previous errors