Skip to content

Commit 05e0a1a

Browse files
committed
Implementation for RUF035 split-of-static-string
1 parent 60a2dc5 commit 05e0a1a

8 files changed

Lines changed: 586 additions & 0 deletions

File tree

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# setup
2+
sep = ","
3+
no_sep = None
4+
5+
# positives
6+
"""
7+
itemA
8+
itemB
9+
itemC
10+
""".split()
11+
12+
"a,b,c,d".split(",")
13+
"a,b,c,d".split(None)
14+
"a,b,c,d".split(",", 1)
15+
"a,b,c,d".split(None, 1)
16+
"a,b,c,d".split(sep=",")
17+
"a,b,c,d".split(sep=None)
18+
"a,b,c,d".split(sep=",", maxsplit=1)
19+
"a,b,c,d".split(sep=None, maxsplit=1)
20+
"a,b,c,d".split(maxsplit=1, sep=",")
21+
"a,b,c,d".split(maxsplit=1, sep=None)
22+
"a,b,c,d".split(",", maxsplit=1)
23+
"a,b,c,d".split(None, maxsplit=1)
24+
"a,b,c,d".split(maxsplit=1)
25+
"a,b,c,d".split(maxsplit=1.0)
26+
"a,b,c,d".split(maxsplit=1)
27+
"a,b,c,d".split(maxsplit=0)
28+
"VERB AUX PRON ADP DET".split(" ")
29+
' 1 2 3 '.split()
30+
'1<>2<>3<4'.split('<>')
31+
32+
# negatives
33+
34+
# test
35+
"a,b,c,d".split(maxsplit="hello")
36+
37+
# variable names not implemented
38+
"a,b,c,d".split(sep)
39+
"a,b,c,d".split(no_sep)
40+
for n in range(3):
41+
"a,b,c,d".split(",", maxsplit=n)
42+
43+
# f-strings not yet implemented
44+
world = "world"
45+
_ = f"{world}_hello_world".split("_")
46+
47+
hello = "hello_world"
48+
_ = f"{hello}_world".split("_")
49+
50+
# split on bytes not yet implemented, much less frequent
51+
b"TesT.WwW.ExamplE.CoM".split(b".")
52+
53+
# str.splitlines not yet implemented
54+
"hello\nworld".splitlines()
55+
"hello\nworld".splitlines(keepends=True)
56+
"hello\nworld".splitlines(keepends=False)

crates/ruff_linter/src/checkers/ast/analyze/expression.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
384384
Rule::StaticJoinToFString,
385385
// refurb
386386
Rule::HashlibDigestHex,
387+
// ruff
388+
Rule::SplitOfStaticString,
387389
]) {
388390
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
389391
let attr = attr.as_str();
@@ -401,6 +403,16 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
401403
string_value.to_str(),
402404
);
403405
}
406+
} else if attr == "split" || attr == "rsplit" {
407+
// "...".split(...) call
408+
if checker.enabled(Rule::SplitOfStaticString) {
409+
ruff::rules::split_of_static_string(
410+
checker,
411+
attr,
412+
call,
413+
string_value.to_str(),
414+
);
415+
}
404416
} else if attr == "format" {
405417
// "...".format(...) call
406418
let location = expr.range();

crates/ruff_linter/src/codes.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -963,6 +963,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
963963
(Ruff, "032") => (RuleGroup::Preview, rules::ruff::rules::DecimalFromFloatLiteral),
964964
(Ruff, "033") => (RuleGroup::Preview, rules::ruff::rules::PostInitDefault),
965965
(Ruff, "034") => (RuleGroup::Preview, rules::ruff::rules::UselessIfElse),
966+
(Ruff, "035") => (RuleGroup::Preview, rules::ruff::rules::SplitOfStaticString),
966967
(Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA),
967968
(Ruff, "101") => (RuleGroup::Stable, rules::ruff::rules::RedirectedNOQA),
968969

crates/ruff_linter/src/rules/ruff/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ mod tests {
6161
#[test_case(Rule::UselessIfElse, Path::new("RUF034.py"))]
6262
#[test_case(Rule::RedirectedNOQA, Path::new("RUF101.py"))]
6363
#[test_case(Rule::PostInitDefault, Path::new("RUF033.py"))]
64+
#[test_case(Rule::SplitOfStaticString, Path::new("RUF035.py"))]
6465
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
6566
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
6667
let diagnostics = test_path(

crates/ruff_linter/src/rules/ruff/rules/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,6 @@ pub(crate) enum Context {
7979
pub(crate) use post_init_default::*;
8080

8181
mod post_init_default;
82+
pub(crate) use split_of_static_string::*;
83+
84+
mod split_of_static_string;
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
2+
use ruff_macros::{derive_message_formats, violation};
3+
use ruff_python_ast::{
4+
Expr, ExprCall, ExprContext, ExprList, ExprStringLiteral, StringLiteral, StringLiteralFlags,
5+
StringLiteralValue,
6+
};
7+
use ruff_text_size::{Ranged, TextRange};
8+
9+
use crate::checkers::ast::Checker;
10+
11+
/// ## What it does
12+
/// Checks for str.split calls that can be replaced with a `list` literal.
13+
///
14+
/// ## Why is this bad?
15+
/// List literals are more readable and do not require the overhead of calling `str.split`.
16+
///
17+
/// ## Example
18+
/// ```python
19+
/// "a,b,c,d".split(",")
20+
/// ```
21+
///
22+
/// Use instead:
23+
/// ```python
24+
/// ["a", "b", "c", "d"]
25+
/// ```
26+
///
27+
/// ## References
28+
///
29+
/// - [Python documentation: `str.split`](https://docs.python.org/3/library/stdtypes.html#str.split)
30+
///
31+
/// ```
32+
#[violation]
33+
pub struct SplitOfStaticString;
34+
35+
impl Violation for SplitOfStaticString {
36+
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;
37+
38+
#[derive_message_formats]
39+
fn message(&self) -> String {
40+
format!("Consider using a list instead of string split")
41+
}
42+
43+
fn fix_title(&self) -> Option<String> {
44+
Some(format!("Replace string split with list literal"))
45+
}
46+
}
47+
48+
fn construct_replacement(list_items: &[&str]) -> Expr {
49+
Expr::List(ExprList {
50+
elts: list_items
51+
.iter()
52+
.map(|list_item| {
53+
Expr::StringLiteral(ExprStringLiteral {
54+
value: StringLiteralValue::single(StringLiteral {
55+
value: (*list_item).to_string().into_boxed_str(),
56+
range: TextRange::default(),
57+
flags: StringLiteralFlags::default(),
58+
}),
59+
range: TextRange::default(),
60+
})
61+
})
62+
.collect(),
63+
ctx: ExprContext::Load,
64+
range: TextRange::default(),
65+
})
66+
}
67+
68+
fn split_default(str_value: &str, max_split: usize) -> Option<Expr> {
69+
// From the Python documentation:
70+
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
71+
// > consecutive whitespace are regarded as a single separator, and the result will contain
72+
// > no empty strings at the start or end if the string has leading or trailing whitespace.
73+
// > Consequently, splitting an empty string or a string consisting of just whitespace with
74+
// > a None separator returns [].
75+
// https://docs.python.org/3/library/stdtypes.html#str.split
76+
if max_split == 0 {
77+
let list_items: Vec<&str> = str_value.split_whitespace().collect();
78+
Some(construct_replacement(&list_items))
79+
} else {
80+
// Autofix for maxsplit without separator not yet implemented
81+
None
82+
}
83+
}
84+
85+
fn split_sep(str_value: &str, sep_value: &str, max_split: usize, direction_left: bool) -> Expr {
86+
let list_items: Vec<&str> = if direction_left && max_split > 0 {
87+
str_value.splitn(max_split + 1, sep_value).collect()
88+
} else if !direction_left && max_split > 0 {
89+
str_value.rsplitn(max_split + 1, sep_value).collect()
90+
} else if direction_left && max_split == 0 {
91+
str_value.split(sep_value).collect()
92+
} else {
93+
str_value.rsplit(sep_value).collect()
94+
};
95+
construct_replacement(&list_items)
96+
}
97+
98+
/// RUF035
99+
pub(crate) fn split_of_static_string(
100+
checker: &mut Checker,
101+
attr: &str,
102+
call: &ExprCall,
103+
str_value: &str,
104+
) {
105+
let ExprCall { arguments, .. } = call;
106+
107+
let sep_arg = arguments.find_argument("sep", 0);
108+
let maxsplit_arg = arguments.find_argument("maxsplit", 1);
109+
110+
// `split` vs `rsplit`
111+
let direction_left = attr == "split";
112+
113+
let maxsplit_value = if let Some(maxsplit) = maxsplit_arg {
114+
match maxsplit {
115+
Expr::NumberLiteral(maxsplit_val) => {
116+
if let Some(int_value) = maxsplit_val.value.as_int() {
117+
if let Some(usize_value) = int_value.as_usize() {
118+
usize_value
119+
} else {
120+
return;
121+
}
122+
} else {
123+
return;
124+
}
125+
}
126+
// Ignore when `maxsplit` is not a numeric value
127+
_ => {
128+
return;
129+
}
130+
}
131+
} else {
132+
0
133+
};
134+
135+
let split_replacement = if let Some(sep) = sep_arg {
136+
match sep {
137+
Expr::NoneLiteral(_) => split_default(str_value, maxsplit_value),
138+
Expr::StringLiteral(sep_value) => {
139+
let sep_value_str = sep_value.value.to_str();
140+
Some(split_sep(
141+
str_value,
142+
sep_value_str,
143+
maxsplit_value,
144+
direction_left,
145+
))
146+
}
147+
// Ignore names until type inference is available
148+
_ => {
149+
return;
150+
}
151+
}
152+
} else {
153+
split_default(str_value, maxsplit_value)
154+
};
155+
156+
let mut diagnostic = Diagnostic::new(SplitOfStaticString, call.range());
157+
if let Some(ref replacement_expr) = split_replacement {
158+
// Construct replacement list
159+
let replacement = checker.generator().expr(replacement_expr);
160+
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement(
161+
replacement,
162+
call.range(),
163+
)));
164+
}
165+
checker.diagnostics.push(diagnostic);
166+
}

0 commit comments

Comments
 (0)