Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# setup
sep = ","
no_sep = None

# positives
"""
itemA
itemB
itemC
""".split()

"a,b,c,d".split(",")
"a,b,c,d".split(None)
"a,b,c,d".split(",", 1)
"a,b,c,d".split(None, 1)
"a,b,c,d".split(sep=",")
"a,b,c,d".split(sep=None)
"a,b,c,d".split(sep=",", maxsplit=1)
"a,b,c,d".split(sep=None, maxsplit=1)
"a,b,c,d".split(maxsplit=1, sep=",")
"a,b,c,d".split(maxsplit=1, sep=None)
"a,b,c,d".split(",", maxsplit=1)
"a,b,c,d".split(None, maxsplit=1)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=1.0)
"a,b,c,d".split(maxsplit=1)
"a,b,c,d".split(maxsplit=0)
"VERB AUX PRON ADP DET".split(" ")
' 1 2 3 '.split()
'1<>2<>3<4'.split('<>')

" a*a a*a a ".split("*", -1) # [' a', 'a a', 'a a ']
"".split() # []
"""
""".split() # []
" ".split() # []
"/abc/".split() # ['/abc/']
("a,b,c"
# comment
.split()
) # ['a,b,c']
("a,b,c"
# comment
.split(",")
) # ['a', 'b', 'c']
("a,"
# comment
"b,"
"c"
.split(",")
) # ['a', 'b', 'c']

"hello "\
"world".split()
# ['hello', 'world']

# prefixes and isc
u"a b".split() # ['a', 'b']
r"a \n b".split() # ['a', '\\n', 'b']
("a " "b").split() # ['a', 'b']
"a " "b".split() # ['a', 'b']
u"a " "b".split() # ['a', 'b']
"a " u"b".split() # ['a', 'b']
u"a " r"\n".split() # ['a', '\\n']
r"\n " u"\n".split() # ['\\n']
r"\n " "\n".split() # ['\\n']
"a " r"\n".split() # ['a', '\\n']

"a,b,c".split(',', maxsplit=0) # ['a,b,c']
"a,b,c".split(',', maxsplit=-1) # ['a', 'b', 'c']
"a,b,c".split(',', maxsplit=-2) # ['a', 'b', 'c']
"a,b,c".split(',', maxsplit=-0) # ['a,b,c']

# negatives

# invalid values should not cause panic
"a,b,c,d".split(maxsplit="hello")
"a,b,c,d".split(maxsplit=-"hello")

# variable names not implemented
"a,b,c,d".split(sep)
"a,b,c,d".split(no_sep)
for n in range(3):
"a,b,c,d".split(",", maxsplit=n)

# f-strings not yet implemented
world = "world"
_ = f"{world}_hello_world".split("_")

hello = "hello_world"
_ = f"{hello}_world".split("_")

# split on bytes not yet implemented, much less frequent
b"TesT.WwW.ExamplE.CoM".split(b".")

# str.splitlines not yet implemented
"hello\nworld".splitlines()
"hello\nworld".splitlines(keepends=True)
"hello\nworld".splitlines(keepends=False)
12 changes: 12 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
Rule::StaticJoinToFString,
// refurb
Rule::HashlibDigestHex,
// flake8-simplify
Rule::SplitStaticString,
]) {
if let Expr::Attribute(ast::ExprAttribute { value, attr, .. }) = func.as_ref() {
let attr = attr.as_str();
Expand All @@ -401,6 +403,16 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
string_value.to_str(),
);
}
} else if matches!(attr, "split" | "rsplit") {
// "...".split(...) call
if checker.enabled(Rule::SplitStaticString) {
flake8_simplify::rules::split_static_string(
checker,
attr,
call,
string_value.to_str(),
);
}
} else if attr == "format" {
// "...".format(...) call
let location = expr.range();
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Flake8Simplify, "223") => (RuleGroup::Stable, rules::flake8_simplify::rules::ExprAndFalse),
(Flake8Simplify, "300") => (RuleGroup::Stable, rules::flake8_simplify::rules::YodaConditions),
(Flake8Simplify, "401") => (RuleGroup::Stable, rules::flake8_simplify::rules::IfElseBlockInsteadOfDictGet),
(Flake8Simplify, "905") => (RuleGroup::Preview, rules::flake8_simplify::rules::SplitStaticString),
(Flake8Simplify, "910") => (RuleGroup::Stable, rules::flake8_simplify::rules::DictGetWithNoneDefault),
(Flake8Simplify, "911") => (RuleGroup::Stable, rules::flake8_simplify::rules::ZipDictKeysAndValues),

Expand Down
5 changes: 3 additions & 2 deletions crates/ruff_linter/src/rules/flake8_simplify/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ mod tests {
#[test_case(Rule::ReimplementedBuiltin, Path::new("SIM111.py"))]
#[test_case(Rule::UncapitalizedEnvironmentVariables, Path::new("SIM112.py"))]
#[test_case(Rule::EnumerateForLoop, Path::new("SIM113.py"))]
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
#[test_case(Rule::OpenFileWithContextHandler, Path::new("SIM115.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
#[test_case(Rule::MultipleWithStatements, Path::new("SIM117.py"))]
#[test_case(Rule::InDictKeys, Path::new("SIM118.py"))]
#[test_case(Rule::NegateEqualOp, Path::new("SIM201.py"))]
Expand All @@ -44,9 +46,8 @@ mod tests {
#[test_case(Rule::ExprAndFalse, Path::new("SIM223.py"))]
#[test_case(Rule::YodaConditions, Path::new("SIM300.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictGet, Path::new("SIM401.py"))]
#[test_case(Rule::SplitStaticString, Path::new("SIM905.py"))]
#[test_case(Rule::DictGetWithNoneDefault, Path::new("SIM910.py"))]
#[test_case(Rule::IfElseBlockInsteadOfDictLookup, Path::new("SIM116.py"))]
#[test_case(Rule::IfWithSameArms, Path::new("SIM114.py"))]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/flake8_simplify/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub(crate) use needless_bool::*;
pub(crate) use open_file_with_context_handler::*;
pub(crate) use reimplemented_builtin::*;
pub(crate) use return_in_try_except_finally::*;
pub(crate) use split_static_string::*;
pub(crate) use suppressible_exception::*;
pub(crate) use yoda_conditions::*;
pub(crate) use zip_dict_keys_and_values::*;
Expand All @@ -35,6 +36,7 @@ mod needless_bool;
mod open_file_with_context_handler;
mod reimplemented_builtin;
mod return_in_try_except_finally;
mod split_static_string;
mod suppressible_exception;
mod yoda_conditions;
mod zip_dict_keys_and_values;
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
use std::cmp::Ordering;

use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{
Expr, ExprCall, ExprContext, ExprList, ExprStringLiteral, ExprUnaryOp, StringLiteral,
StringLiteralFlags, StringLiteralValue, UnaryOp,
};
use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for `str.split` calls that can be replaced with a list literal.
///
/// ## Why is this bad?
/// List literals are more readable and do not require the overhead of calling `str.split`.
///
/// ## Example
/// ```python
/// "a,b,c,d".split(",")
/// ```
///
/// Use instead:
/// ```python
/// ["a", "b", "c", "d"]
/// ```
///
/// ## References
///
/// - [Python documentation: `str.split`](https://docs.python.org/3/library/stdtypes.html#str.split)
///
/// ```
#[violation]
pub struct SplitStaticString;

impl Violation for SplitStaticString {
const FIX_AVAILABILITY: FixAvailability = FixAvailability::Sometimes;

#[derive_message_formats]
fn message(&self) -> String {
format!("Consider using a list instead of `str.split`")
}

fn fix_title(&self) -> Option<String> {
Some(format!("Replace `str.split` with list literal"))
}
}

fn construct_replacement(list_items: &[&str]) -> Expr {
Expr::List(ExprList {
elts: list_items
.iter()
.map(|list_item| {
Expr::StringLiteral(ExprStringLiteral {
value: StringLiteralValue::single(StringLiteral {
value: (*list_item).to_string().into_boxed_str(),
range: TextRange::default(),
flags: StringLiteralFlags::default(),
}),
range: TextRange::default(),
})
})
.collect(),
ctx: ExprContext::Load,
range: TextRange::default(),
})
}

fn split_default(str_value: &str, max_split: i32) -> Option<Expr> {
// From the Python documentation:
// > If sep is not specified or is None, a different splitting algorithm is applied: runs of
// > consecutive whitespace are regarded as a single separator, and the result will contain
// > no empty strings at the start or end if the string has leading or trailing whitespace.
// > Consequently, splitting an empty string or a string consisting of just whitespace with
// > a None separator returns [].
// https://docs.python.org/3/library/stdtypes.html#str.split
match max_split.cmp(&0) {
Ordering::Greater => {
// Autofix for maxsplit without separator not yet implemented
// split_whitespace().remainder() is still experimental:
// https://doc.rust-lang.org/std/str/struct.SplitWhitespace.html#method.remainder
None
}
Ordering::Equal => {
let list_items: Vec<&str> = vec![str_value];
Some(construct_replacement(&list_items))
}
Ordering::Less => {
let list_items: Vec<&str> = str_value.split_whitespace().collect();
Some(construct_replacement(&list_items))
}
}
}

fn split_sep(str_value: &str, sep_value: &str, max_split: i32, direction_left: bool) -> Expr {
let list_items: Vec<&str> = if let Ok(split_n) = usize::try_from(max_split) {
if direction_left {
str_value.splitn(split_n + 1, sep_value).collect()
} else {
str_value.rsplitn(split_n + 1, sep_value).collect()
}
} else {
if direction_left {
str_value.split(sep_value).collect()
} else {
str_value.rsplit(sep_value).collect()
}
};

construct_replacement(&list_items)
}

fn get_maxsplit_value(maxsplit_arg: Option<&Expr>) -> Option<i32> {
let maxsplit_value = if let Some(maxsplit) = maxsplit_arg {
match maxsplit {
// Negative number
Expr::UnaryOp(ExprUnaryOp {
op: UnaryOp::USub,
operand,
..
}) => {
match &**operand {
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
.value
.as_int()
.and_then(ruff_python_ast::Int::as_i32)
.map(|f| -f),
// Ignore when `maxsplit` is not a numeric value
_ => None,
}
}
// Positive number
Expr::NumberLiteral(maxsplit_val) => maxsplit_val
.value
.as_int()
.and_then(ruff_python_ast::Int::as_i32),
// Ignore when `maxsplit` is not a numeric value
_ => None,
}
} else {
// Default value is -1 (no splits)
Some(-1)
};
maxsplit_value
}

/// SIM905
pub(crate) fn split_static_string(
checker: &mut Checker,
attr: &str,
call: &ExprCall,
str_value: &str,
) {
let ExprCall { arguments, .. } = call;

let maxsplit_arg = arguments.find_argument("maxsplit", 1);
let Some(maxsplit_value) = get_maxsplit_value(maxsplit_arg) else {
return;
};

// `split` vs `rsplit`
let direction_left = attr == "split";

let sep_arg = arguments.find_argument("sep", 0);
let split_replacement = if let Some(sep) = sep_arg {
match sep {
Expr::NoneLiteral(_) => split_default(str_value, maxsplit_value),
Expr::StringLiteral(sep_value) => {
let sep_value_str = sep_value.value.to_str();
Some(split_sep(
str_value,
sep_value_str,
maxsplit_value,
direction_left,
))
}
// Ignore names until type inference is available
_ => {
return;
}
}
} else {
split_default(str_value, maxsplit_value)
};

let mut diagnostic = Diagnostic::new(SplitStaticString, call.range());
if let Some(ref replacement_expr) = split_replacement {
// Construct replacement list
let replacement = checker.generator().expr(replacement_expr);
// Unsafe because the fix does not preserve comments within implicit string concatenation
diagnostic.set_fix(Fix::unsafe_edit(Edit::range_replacement(
replacement,
call.range(),
)));
}
checker.diagnostics.push(diagnostic);
}
Loading