diff --git a/DESCRIPTION b/DESCRIPTION index fa887dabba..3ea3c25d29 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -81,6 +81,7 @@ Collate: 'condition_message_linter.R' 'conjunct_test_linter.R' 'consecutive_assertion_linter.R' + 'consecutive_mutate_linter.R' 'cyclocomp_linter.R' 'declared_functions.R' 'deprecated.R' diff --git a/NAMESPACE b/NAMESPACE index 5efa125a59..e14f0b3e03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(comparison_negation_linter) export(condition_message_linter) export(conjunct_test_linter) export(consecutive_assertion_linter) +export(consecutive_mutate_linter) export(consecutive_stopifnot_linter) export(cyclocomp_linter) export(default_linters) diff --git a/NEWS.md b/NEWS.md index a84b596df0..da97623782 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,7 @@ * `which_grepl_linter()` for discouraging `which(grepl(ptn, x))` in favor of directly using `grep(ptn, x)` (part of #884, @MichaelChirico). * `list_comparison_linter()` for discouraging comparisons on the output of `lapply()`, e.g. `lapply(x, sum) > 10` (part of #884, @MichaelChirico). * `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico). +* `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico). * `if_switch_linter()` for encouraging `switch()` over repeated `if`/`else` tests (part of #884, @MichaelChirico). * `nested_pipe_linter()` for discouraging pipes within pipes, e.g. `df1 %>% inner_join(df2 %>% select(a, b))` (part of #884, @MichaelChirico). * `nrow_subset_linter()` for discouraging usage like `nrow(subset(x, conditions))` in favor of something like `with(x, sum(conditions))` which doesn't require a full subset of `x` (part of #884, @MichaelChirico). diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R new file mode 100644 index 0000000000..6d1032fc81 --- /dev/null +++ b/R/consecutive_mutate_linter.R @@ -0,0 +1,101 @@ +#' Require consecutive calls to mutate() to be combined when possible +#' +#' `dplyr::mutate()` accepts any number of columns, so sequences like +#' `DF %>% dplyr::mutate(..1) %>% dplyr::mutate(..2)` are redundant -- +#' they can always be expressed with a single call to `dplyr::mutate()`. +#' +#' An exception is for some SQL back-ends, where the translation logic may not be +#' as sophisticated as that in the default `dplyr`, for example in +#' `DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)`. +#' +#' @param invalid_backends Character vector of packages providing dplyr backends +#' which may not be compatible with combining `mutate()` calls in all cases. +#' Defaults to `"dbplyr"` since not all SQL backends can handle re-using +#' a variable defined in the same `mutate()` expression. +#' +#' @examples +#' # will produce lints +#' lint( +#' text = "x %>% mutate(a = 1) %>% mutate(b = 2)", +#' linters = consecutive_mutate_linter() +#' ) +#' +#' # okay +#' lint( +#' text = "x %>% mutate(a = 1, b = 2)", +#' linters = consecutive_mutate_linter() +#' ) +#' +#' code <- "library(dbplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = consecutive_mutate_linter() +#' ) +#' +#' @evalRd rd_tags("consecutive_mutate_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { + attach_pkg_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'library' or text() = 'require'] + /parent::expr + /following-sibling::expr + /*[self::SYMBOL or self::STR_CONST] + ") + + namespace_xpath <- glue(" + //SYMBOL_PACKAGE[{ xp_text_in_table(invalid_backends) }] + | + //COMMENT[ + contains(text(), '@import') + and ( + {xp_or(sprintf(\"contains(text(), '%s')\", invalid_backends))} + ) + ] + ") + + # match on the expr, not the SYMBOL_FUNCTION_CALL, to ensure + # namespace-qualified calls only match if the namespaces do. + # expr[2] needed in expr[1][expr[2]] to skip matches on pipelines + # starting like mutate(DF, ...) %>% foo() %>% mutate(). + # similarly, expr[1][expr[call='mutate']] covers pipelines + # starting like mutate(DF, ...) %>% mutate(...) + mutate_cond <- xp_and( + "expr/SYMBOL_FUNCTION_CALL[text() = 'mutate']", + "not(SYMBOL_SUB[text() = '.keep' or text() = '.by'])" + ) + xpath <- glue(" + (//PIPE | //SPECIAL[{ xp_text_in_table(magrittr_pipes) }]) + /preceding-sibling::expr[expr[2][{ mutate_cond }] or ({ mutate_cond })] + /following-sibling::expr[{ mutate_cond }] + ") + + Linter(function(source_expression) { + # need the full file to also catch usages at the top level + if (!is_lint_level(source_expression, "file")) { + return(list()) + } + + xml <- source_expression$full_xml_parsed_content + + attach_str <- get_r_string(xml_find_all(xml, attach_pkg_xpath)) + if (any(invalid_backends %in% attach_str)) { + return(list()) + } + + namespace_expr <- xml_find_first(xml, namespace_xpath) + if (!is.na(namespace_expr)) { + return(list()) + } + + bad_expr <- xml_find_all(xml, xpath) + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = "Unify consecutive calls to mutate().", + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 56b3c8aa56..420fca4e3d 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -14,6 +14,7 @@ comparison_negation_linter,readability consistency condition_message_linter,best_practices consistency conjunct_test_linter,package_development best_practices readability configurable pkg_testthat consecutive_assertion_linter,style readability consistency +consecutive_mutate_linter,consistency readability configurable efficiency consecutive_stopifnot_linter,style readability consistency deprecated cyclocomp_linter,style readability best_practices default configurable duplicate_argument_linter,correctness common_mistakes configurable diff --git a/man/configurable_linters.Rd b/man/configurable_linters.Rd index ac0be31203..f233c62ebc 100644 --- a/man/configurable_linters.Rd +++ b/man/configurable_linters.Rd @@ -18,6 +18,7 @@ The following linters are tagged with 'configurable': \item{\code{\link{brace_linter}}} \item{\code{\link{commas_linter}}} \item{\code{\link{conjunct_test_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{cyclocomp_linter}}} \item{\code{\link{duplicate_argument_linter}}} \item{\code{\link{fixed_regex_linter}}} diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd new file mode 100644 index 0000000000..a083ce7ab6 --- /dev/null +++ b/man/consecutive_mutate_linter.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/consecutive_mutate_linter.R +\name{consecutive_mutate_linter} +\alias{consecutive_mutate_linter} +\title{Require consecutive calls to mutate() to be combined when possible} +\usage{ +consecutive_mutate_linter(invalid_backends = "dbplyr") +} +\arguments{ +\item{invalid_backends}{Character vector of packages providing dplyr backends +which may not be compatible with combining \code{mutate()} calls in all cases. +Defaults to \code{"dbplyr"} since not all SQL backends can handle re-using +a variable defined in the same \code{mutate()} expression.} +} +\description{ +\code{dplyr::mutate()} accepts any number of columns, so sequences like +\code{DF \%>\% dplyr::mutate(..1) \%>\% dplyr::mutate(..2)} are redundant -- +they can always be expressed with a single call to \code{dplyr::mutate()}. +} +\details{ +An exception is for some SQL back-ends, where the translation logic may not be +as sophisticated as that in the default \code{dplyr}, for example in +\code{DF \%>\% mutate(a = a + 1) \%>\% mutate(b = a - 2)}. +} +\examples{ +# will produce lints +lint( + text = "x \%>\% mutate(a = 1) \%>\% mutate(b = 2)", + linters = consecutive_mutate_linter() +) + +# okay +lint( + text = "x \%>\% mutate(a = 1, b = 2)", + linters = consecutive_mutate_linter() +) + +code <- "library(dbplyr)\nx \%>\% mutate(a = 1) \%>\% mutate(a = a + 1)" +writeLines(code) +lint( + text = code, + linters = consecutive_mutate_linter() +) + +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=configurable_linters]{configurable}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} +} diff --git a/man/consistency_linters.Rd b/man/consistency_linters.Rd index 0ce014e296..81b66e09af 100644 --- a/man/consistency_linters.Rd +++ b/man/consistency_linters.Rd @@ -18,6 +18,7 @@ The following linters are tagged with 'consistency': \item{\code{\link{comparison_negation_linter}}} \item{\code{\link{condition_message_linter}}} \item{\code{\link{consecutive_assertion_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{function_argument_linter}}} \item{\code{\link{if_not_else_linter}}} \item{\code{\link{if_switch_linter}}} diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 58674f8f5d..37cb8b0d74 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -15,6 +15,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{any_duplicated_linter}}} \item{\code{\link{any_is_na_linter}}} \item{\code{\link{boolean_arithmetic_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{fixed_regex_linter}}} \item{\code{\link{if_switch_linter}}} \item{\code{\link{ifelse_censor_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index 288bdc1107..a4daafe26c 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -19,16 +19,16 @@ The following tags exist: \itemize{ \item{\link[=best_practices_linters]{best_practices} (63 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (10 linters)} -\item{\link[=configurable_linters]{configurable} (36 linters)} -\item{\link[=consistency_linters]{consistency} (30 linters)} +\item{\link[=configurable_linters]{configurable} (37 linters)} +\item{\link[=consistency_linters]{consistency} (31 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} -\item{\link[=efficiency_linters]{efficiency} (31 linters)} +\item{\link[=efficiency_linters]{efficiency} (32 linters)} \item{\link[=executing_linters]{executing} (6 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (63 linters)} +\item{\link[=readability_linters]{readability} (64 linters)} \item{\link[=regex_linters]{regex} (4 linters)} \item{\link[=robustness_linters]{robustness} (17 linters)} \item{\link[=style_linters]{style} (39 linters)} @@ -51,6 +51,7 @@ The following linters exist: \item{\code{\link{condition_message_linter}} (tags: best_practices, consistency)} \item{\code{\link{conjunct_test_linter}} (tags: best_practices, configurable, package_development, pkg_testthat, readability)} \item{\code{\link{consecutive_assertion_linter}} (tags: consistency, readability, style)} +\item{\code{\link{consecutive_mutate_linter}} (tags: configurable, consistency, efficiency, readability)} \item{\code{\link{cyclocomp_linter}} (tags: best_practices, configurable, default, readability, style)} \item{\code{\link{duplicate_argument_linter}} (tags: common_mistakes, configurable, correctness)} \item{\code{\link{empty_assignment_linter}} (tags: best_practices, readability)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index dcdc85bfda..b3d8e6a8cf 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -19,6 +19,7 @@ The following linters are tagged with 'readability': \item{\code{\link{comparison_negation_linter}}} \item{\code{\link{conjunct_test_linter}}} \item{\code{\link{consecutive_assertion_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{cyclocomp_linter}}} \item{\code{\link{empty_assignment_linter}}} \item{\code{\link{expect_length_linter}}} diff --git a/tests/testthat/test-consecutive_mutate_linter.R b/tests/testthat/test-consecutive_mutate_linter.R new file mode 100644 index 0000000000..641a0b3592 --- /dev/null +++ b/tests/testthat/test-consecutive_mutate_linter.R @@ -0,0 +1,154 @@ +test_that("consecutive_mutate_linter skips allowed usages", { + linter <- consecutive_mutate_linter() + + expect_lint("DF %>% mutate(x = 1)", NULL, linter) + + # intervening expression + expect_lint("DF %>% mutate(x = 1) %>% filter(x > 2) %>% mutate(y = 2)", NULL, linter) + + # pipeline starts with mutate() + expect_lint("mutate(DF, x = 1) %>% arrange(y) %>% mutate(z = 2)", NULL, linter) + + # new dplyr: .keep and .by arguments are ignored + expect_lint("DF %>% mutate(a = 1) %>% mutate(a = a / sum(a), .by = b)", NULL, linter) + expect_lint("DF %>% mutate(a = 1) %>% mutate(a = b, .keep = 'none')", NULL, linter) + expect_lint("DF %>% mutate(a = a / sum(a), .by = b) %>% mutate(c = 1)", NULL, linter) + expect_lint("DF %>% mutate(a = 1, .keep = 'none') %>% mutate(a = a + 1)", NULL, linter) +}) + +patrick::with_parameters_test_that( + "consecutive_mutate_linter skips files loading SQL backends", + { + linter <- consecutive_mutate_linter(invalid_backends = backend) + + expect_lint( + trim_some(glue::glue(" + library({backend}) + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue::glue(" + require('{backend}') + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue(" + conn %>% + tbl({backend}::sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue(" + conn %>% + tbl({backend}:::sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue(" + #' @import {backend} + NULL + + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue(" + #' @importFrom {backend} sql + NULL + + conn %>% + tbl(sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + ")), + NULL, + linter + ) + }, + .test_name = c("dbplyr", "custom.backend"), + backend = c("dbplyr", "custom.backend") +) + +test_that("consecutive_mutate_linter blocks simple disallowed usages", { + linter <- consecutive_mutate_linter() + lint_msg <- rex::rex("Unify consecutive calls to mutate().") + + # one test of inline usage + expect_lint("DF %>% mutate(a = 1) %>% mutate(b = 2)", lint_msg, linter) + + expect_lint( + trim_some(" + DF %>% + mutate(a = 1) %>% + mutate(b = 2) + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + DF %>% + dplyr::mutate(a = 1) %>% + dplyr::mutate(b = 2) + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + DF %>% + mutate(a = 1) %>% + # a comment on b + mutate(b = 2) + "), + lint_msg, + linter + ) + + # mutate to open pipeline followed by mutate + expect_lint("mutate(DF, x = 1) %>% mutate(x = 2)", lint_msg, linter) +}) + +test_that("'parallel' calls are not linted", { + linter <- consecutive_mutate_linter() + + expect_lint("foo(mutate(DF1, x = 1), mutate(DF2, y = 2))", NULL, linter) + + expect_lint("foo(DF1 %>% mutate(x = 1), DF2 %>% mutate(y = 2))", NULL, linter) + + expect_lint("DF1 %>% mutate(x = 1) %>% inner_join(DF2 %>% mutate(y = 2))", NULL, linter) +}) + +test_that("native pipe is linted", { + skip_if_not_r_version("4.1.0") + + linter <- consecutive_mutate_linter() + lint_msg <- rex::rex("Unify consecutive calls to mutate().") + + expect_lint("DF |> mutate(a = 1) |> mutate(b = 2)", lint_msg, linter) + # Ditto mixed pipes + expect_lint("DF %>% mutate(a = 1) |> mutate(b = 2)", lint_msg, linter) +})