From 712a10e4e7fa821abc430c23c3dde7e0e7aced67 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 17 Nov 2023 22:20:40 +0000 Subject: [PATCH 01/11] New consecutive_mutate_linter --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 1 + R/consecutive_mutate_linter.R | 76 ++++++++++++++ inst/lintr/linters.csv | 1 + man/configurable_linters.Rd | 1 + man/consecutive_mutate_linter.Rd | 40 ++++++++ man/consistency_linters.Rd | 1 + man/linters.Rd | 7 +- man/readability_linters.Rd | 1 + .../testthat/test-consecutive_mutate_linter.R | 98 +++++++++++++++++++ 11 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 R/consecutive_mutate_linter.R create mode 100644 man/consecutive_mutate_linter.Rd create mode 100644 tests/testthat/test-consecutive_mutate_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index 7e0f6250d8..4f4c59c153 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -81,6 +81,7 @@ Collate: 'condition_message_linter.R' 'conjunct_test_linter.R' 'consecutive_assertion_linter.R' + 'consecutive_mutate_linter.R' 'cyclocomp_linter.R' 'declared_functions.R' 'deprecated.R' diff --git a/NAMESPACE b/NAMESPACE index 8be33824dd..8fadcf9798 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(comparison_negation_linter) export(condition_message_linter) export(conjunct_test_linter) export(consecutive_assertion_linter) +export(consecutive_mutate_linter) export(consecutive_stopifnot_linter) export(cyclocomp_linter) export(default_linters) diff --git a/NEWS.md b/NEWS.md index 477b8e61ce..2692f73504 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,7 @@ * `comparison_negation_linter()` for discouraging negated comparisons when a direct negation is preferable, e.g. `!(x == y)` could be `x != y` (part of #884, @MichaelChirico). * `terminal_close_linter()` for discouraging using `close()` to end functions (part of #884, @MichaelChirico). Such usages are not robust to errors, where `close()` will not be run as intended. Put `close()` in an `on.exit()` hook, or use {withr} to manage connections with proper cleanup. * `print_linter()` for discouraging usage of `print()` on string literals like `print("Reached here")` or `print(paste("Found", nrow(DF), "rows."))` (#1894, @MichaelChirico). +* `consecutive_mutate_linter()` for encouraging consecutive calls to `dplyr::mutate()` to be combined (part of #884, @MichaelChirico). ### Lint accuracy fixes: removing false positives diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R new file mode 100644 index 0000000000..9f36b83f3c --- /dev/null +++ b/R/consecutive_mutate_linter.R @@ -0,0 +1,76 @@ +#' Force consecutive calls to mutate() into just one when possible +#' +#' `dplyr::mutate()` accepts any number of columns, so sequences like +#' `DF %>% dplyr::mutate(..1) %>% dplyr::mutate(..2)` are redundant -- +#' they can always be expressed with a single call to `dplyr::mutate()`. +#' +#' An exception is for some SQL back-ends, where the translation logic may not be +#' as sophisticated as that in the default `dplyr`, for example in +#' `DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)`. +#' +#' @examples +#' # will produce lints +#' lint( +#' text = "x %>% mutate(a = 1) %>% mutate(b = 2)", +#' linters = consecutive_mutate_linter() +#' ) +#' +#' # okay +#' code <- "library(dplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = consecutive_mutate_linter() +#' ) +#' +#' @evalRd rd_tags("consecutive_mutate_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { + blocked_library_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'library'] + /parent::expr + /following-sibling::expr[SYMBOL[{ xp_text_in_table(invalid_backends) }]] + ") + + # match on the expr, not the SYMBOL_FUNCTION_CALL, to ensure + # namespace-qualified calls only match if the namespaces do. + # expr[2] needed in expr[1][expr[2]] to skip matches on pipelines + # starting like mutate(DF, ...) %>% foo() %>% mutate(). + # similarly, expr[1][expr[call='mutate']] covers pipelines + # starting like mutate(DF, ...) %>% mutate(...) + xpath <- glue(" + (//PIPE | //SPECIAL[{ xp_text_in_table(magrittr_pipes) }]) + /preceding-sibling::expr[ + expr[2][expr/SYMBOL_FUNCTION_CALL[text() = 'mutate']] + or expr/SYMBOL_FUNCTION_CALL[text() = 'mutate'] + ] + /following-sibling::expr[ + expr/SYMBOL_FUNCTION_CALL[text() = 'mutate'] + and not(SYMBOL_SUB[text() = '.keep' or text() = '.by']) + ] + ") + + Linter(function(source_expression) { + # need the full file to also catch usages at the top level + if (!is_lint_level(source_expression, "file")) { + return(list()) + } + + xml <- source_expression$full_xml_parsed_content + + blocked_expr <- xml_find_first(xml, blocked_library_xpath) + if (!is.na(blocked_expr)) { + return(list()) + } + + bad_expr <- xml_find_all(xml, xpath) + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = "Unify consecutive calls to mutate().", + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 334f6167d2..70c5401c2f 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -14,6 +14,7 @@ comparison_negation_linter,readability consistency condition_message_linter,best_practices consistency conjunct_test_linter,package_development best_practices readability configurable pkg_testthat consecutive_assertion_linter,style readability consistency +consecutive_mutate_linter,consistency readability configurable consecutive_stopifnot_linter,style readability consistency deprecated cyclocomp_linter,style readability best_practices default configurable duplicate_argument_linter,correctness common_mistakes configurable diff --git a/man/configurable_linters.Rd b/man/configurable_linters.Rd index 19899dd62c..92f92e86f1 100644 --- a/man/configurable_linters.Rd +++ b/man/configurable_linters.Rd @@ -18,6 +18,7 @@ The following linters are tagged with 'configurable': \item{\code{\link{brace_linter}}} \item{\code{\link{commas_linter}}} \item{\code{\link{conjunct_test_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{cyclocomp_linter}}} \item{\code{\link{duplicate_argument_linter}}} \item{\code{\link{fixed_regex_linter}}} diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd new file mode 100644 index 0000000000..18753fb5e7 --- /dev/null +++ b/man/consecutive_mutate_linter.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/consecutive_mutate_linter.R +\name{consecutive_mutate_linter} +\alias{consecutive_mutate_linter} +\title{Force consecutive calls to mutate() into just one when possible} +\usage{ +consecutive_mutate_linter(invalid_backends = "dbplyr") +} +\description{ +\code{dplyr::mutate()} accepts any number of columns, so sequences like +\code{DF \%>\% dplyr::mutate(..1) \%>\% dplyr::mutate(..2)} are redundant -- +they can always be expressed with a single call to \code{dplyr::mutate()}. +} +\details{ +An exception is for some SQL back-ends, where the translation logic may not be +as sophisticated as that in the default \code{dplyr}, for example in +\code{DF \%>\% mutate(a = a + 1) \%>\% mutate(b = a - 2)}. +} +\examples{ +# will produce lints +lint( + text = "x \%>\% mutate(a = 1) \%>\% mutate(b = 2)", + linters = consecutive_mutate_linter() +) + +# okay +code <- "library(dplyr)\nx \%>\% mutate(a = 1) \%>\% mutate(a = a + 1)" +writeLines(code) +lint( + text = code, + linters = consecutive_mutate_linter() +) + +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=configurable_linters]{configurable}, \link[=consistency_linters]{consistency}, \link[=readability_linters]{readability} +} diff --git a/man/consistency_linters.Rd b/man/consistency_linters.Rd index 7d8a609c64..aa7ad7f23d 100644 --- a/man/consistency_linters.Rd +++ b/man/consistency_linters.Rd @@ -18,6 +18,7 @@ The following linters are tagged with 'consistency': \item{\code{\link{comparison_negation_linter}}} \item{\code{\link{condition_message_linter}}} \item{\code{\link{consecutive_assertion_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{function_argument_linter}}} \item{\code{\link{if_not_else_linter}}} \item{\code{\link{implicit_integer_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index 91d035bb03..5688f448ee 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -19,8 +19,8 @@ The following tags exist: \itemize{ \item{\link[=best_practices_linters]{best_practices} (56 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (8 linters)} -\item{\link[=configurable_linters]{configurable} (34 linters)} -\item{\link[=consistency_linters]{consistency} (24 linters)} +\item{\link[=configurable_linters]{configurable} (35 linters)} +\item{\link[=consistency_linters]{consistency} (25 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (8 linters)} @@ -28,7 +28,7 @@ The following tags exist: \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (57 linters)} +\item{\link[=readability_linters]{readability} (58 linters)} \item{\link[=robustness_linters]{robustness} (16 linters)} \item{\link[=style_linters]{style} (38 linters)} } @@ -50,6 +50,7 @@ The following linters exist: \item{\code{\link{condition_message_linter}} (tags: best_practices, consistency)} \item{\code{\link{conjunct_test_linter}} (tags: best_practices, configurable, package_development, pkg_testthat, readability)} \item{\code{\link{consecutive_assertion_linter}} (tags: consistency, readability, style)} +\item{\code{\link{consecutive_mutate_linter}} (tags: configurable, consistency, readability)} \item{\code{\link{cyclocomp_linter}} (tags: best_practices, configurable, default, readability, style)} \item{\code{\link{duplicate_argument_linter}} (tags: common_mistakes, configurable, correctness)} \item{\code{\link{empty_assignment_linter}} (tags: best_practices, readability)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index 06deb9233e..d509aa8a97 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -19,6 +19,7 @@ The following linters are tagged with 'readability': \item{\code{\link{comparison_negation_linter}}} \item{\code{\link{conjunct_test_linter}}} \item{\code{\link{consecutive_assertion_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{cyclocomp_linter}}} \item{\code{\link{empty_assignment_linter}}} \item{\code{\link{expect_length_linter}}} diff --git a/tests/testthat/test-consecutive_mutate_linter.R b/tests/testthat/test-consecutive_mutate_linter.R new file mode 100644 index 0000000000..ceea194504 --- /dev/null +++ b/tests/testthat/test-consecutive_mutate_linter.R @@ -0,0 +1,98 @@ +test_that("consecutive_mutate_linter skips allowed usages", { + linter <- consecutive_mutate_linter() + + expect_lint("DF %>% mutate(x = 1)", NULL, linter) + + # intervening expression + expect_lint("DF %>% mutate(x = 1) %>% filter(x > 2) %>% mutate(y = 2)", NULL, linter) + + # pipeline starts with mutate() + expect_lint("mutate(DF, x = 1) %>% arrange(y) %>% mutate(z = 2)", NULL, linter) + + # new dplyr: .keep and .by arguments are ignored + expect_lint("DF %>% mutate(a = 1) %>% mutate(a = a / sum(a), .by = b)", NULL, linter) + expect_lint("DF %>% mutate(a = 1) %>% mutate(a = b, .keep = 'none')", NULL, linter) +}) + +test_that("consecutive_mutate_linter skips files loading SQL backends", { + expect_lint( + trim_some(" + library(dbplyr) + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + "), + NULL, + consecutive_mutate_linter() + ) + + expect_lint( + trim_some(" + library(custom.backend) + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + "), + NULL, + consecutive_mutate_linter(invalid_backends = "custom.backend") + ) +}) + +test_that("consecutive_mutate_linter blocks simple disallowed usages", { + linter <- consecutive_mutate_linter() + lint_msg <- rex::rex("Unify consecutive calls to mutate().") + + # one test of inline usage + expect_lint("DF %>% mutate(a = 1) %>% mutate(b = 2)", lint_msg, linter) + + expect_lint( + trim_some(" + DF %>% + mutate(a = 1) %>% + mutate(b = 2) + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + DF %>% + dplyr::mutate(a = 1) %>% + dplyr::mutate(b = 2) + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + DF %>% + mutate(a = 1) %>% + # a comment on b + mutate(b = 2) + "), + lint_msg, + linter + ) + + # mutate to open pipeline followed by mutate + expect_lint("mutate(DF, x = 1) %>% mutate(x = 2)", lint_msg, linter) +}) + +test_that("'parallel' calls are not linted", { + linter <- consecutive_mutate_linter() + + expect_lint("foo(mutate(DF1, x = 1), mutate(DF2, y = 2))", NULL, linter) + + expect_lint("foo(DF1 %>% mutate(x = 1), DF2 %>% mutate(y = 2))", NULL, linter) + + expect_lint("DF1 %>% mutate(x = 1) %>% inner_join(DF2 %>% mutate(y = 2))", NULL, linter) +}) + +test_that("native pipe is linted", { + skip_if_not_r_version("4.1.0") + + linter <- consecutive_mutate_linter() + lint_msg <- rex::rex("Unify consecutive calls to mutate().") + + expect_lint("DF |> mutate(a = 1) |> mutate(b = 2)", lint_msg, linter) + # Ditto mixed pipes + expect_lint("DF %>% mutate(a = 1) |> mutate(b = 2)", lint_msg, linter) +}) From 3907fbb214b1acc9871bb40cc0388f25f7135931 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 17 Nov 2023 22:37:24 +0000 Subject: [PATCH 02/11] @param --- R/consecutive_mutate_linter.R | 5 +++++ man/consecutive_mutate_linter.Rd | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 9f36b83f3c..702897d884 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -8,6 +8,11 @@ #' as sophisticated as that in the default `dplyr`, for example in #' `DF %>% mutate(a = a + 1) %>% mutate(b = a - 2)`. #' +#' @param invalid_backends Character vector of packages providing dplyr backends +#' which may not be compatible with combining `mutate()` calls in all cases. +#' Defaults to `"dbplyr"` since not all SQL backends can handle re-using +#' a variable defined in the same `mutate()` expression. +#' #' @examples #' # will produce lints #' lint( diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd index 18753fb5e7..54d98cd8a3 100644 --- a/man/consecutive_mutate_linter.Rd +++ b/man/consecutive_mutate_linter.Rd @@ -6,6 +6,12 @@ \usage{ consecutive_mutate_linter(invalid_backends = "dbplyr") } +\arguments{ +\item{invalid_backends}{Character vector of packages providing dplyr backends +which may not be compatible with combining \code{mutate()} calls in all cases. +Defaults to \code{"dbplyr"} since not all SQL backends can handle re-using +a variable defined in the same \code{mutate()} expression.} +} \description{ \code{dplyr::mutate()} accepts any number of columns, so sequences like \code{DF \%>\% dplyr::mutate(..1) \%>\% dplyr::mutate(..2)} are redundant -- From 95b0fc48cd76c71a7f4b4cc1a692cddef64550af Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 01:02:45 +0000 Subject: [PATCH 03/11] typo in example --- R/consecutive_mutate_linter.R | 2 +- man/consecutive_mutate_linter.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 702897d884..a05b9bd50f 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -21,7 +21,7 @@ #' ) #' #' # okay -#' code <- "library(dplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" +#' code <- "library(dbplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" #' writeLines(code) #' lint( #' text = code, diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd index 54d98cd8a3..efffdbedbb 100644 --- a/man/consecutive_mutate_linter.Rd +++ b/man/consecutive_mutate_linter.Rd @@ -30,7 +30,7 @@ lint( ) # okay -code <- "library(dplyr)\nx \%>\% mutate(a = 1) \%>\% mutate(a = a + 1)" +code <- "library(dbplyr)\nx \%>\% mutate(a = 1) \%>\% mutate(a = a + 1)" writeLines(code) lint( text = code, From e9e5eda6468b34a71f3ef552b8766bf5de7c926e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 01:03:03 +0000 Subject: [PATCH 04/11] efficiency tag --- inst/lintr/linters.csv | 2 +- man/consecutive_mutate_linter.Rd | 2 +- man/efficiency_linters.Rd | 1 + man/linters.Rd | 4 ++-- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 70c5401c2f..28a733c024 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -14,7 +14,7 @@ comparison_negation_linter,readability consistency condition_message_linter,best_practices consistency conjunct_test_linter,package_development best_practices readability configurable pkg_testthat consecutive_assertion_linter,style readability consistency -consecutive_mutate_linter,consistency readability configurable +consecutive_mutate_linter,consistency readability configurable efficiency consecutive_stopifnot_linter,style readability consistency deprecated cyclocomp_linter,style readability best_practices default configurable duplicate_argument_linter,correctness common_mistakes configurable diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd index efffdbedbb..4553fe6d24 100644 --- a/man/consecutive_mutate_linter.Rd +++ b/man/consecutive_mutate_linter.Rd @@ -42,5 +42,5 @@ lint( \link{linters} for a complete list of linters available in lintr. } \section{Tags}{ -\link[=configurable_linters]{configurable}, \link[=consistency_linters]{consistency}, \link[=readability_linters]{readability} +\link[=configurable_linters]{configurable}, \link[=consistency_linters]{consistency}, \link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability} } diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 146c0be543..afd0c439f8 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -15,6 +15,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{any_duplicated_linter}}} \item{\code{\link{any_is_na_linter}}} \item{\code{\link{boolean_arithmetic_linter}}} +\item{\code{\link{consecutive_mutate_linter}}} \item{\code{\link{fixed_regex_linter}}} \item{\code{\link{ifelse_censor_linter}}} \item{\code{\link{inner_combine_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index 5688f448ee..8e8c840a6f 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -24,7 +24,7 @@ The following tags exist: \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (8 linters)} -\item{\link[=efficiency_linters]{efficiency} (26 linters)} +\item{\link[=efficiency_linters]{efficiency} (27 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} @@ -50,7 +50,7 @@ The following linters exist: \item{\code{\link{condition_message_linter}} (tags: best_practices, consistency)} \item{\code{\link{conjunct_test_linter}} (tags: best_practices, configurable, package_development, pkg_testthat, readability)} \item{\code{\link{consecutive_assertion_linter}} (tags: consistency, readability, style)} -\item{\code{\link{consecutive_mutate_linter}} (tags: configurable, consistency, readability)} +\item{\code{\link{consecutive_mutate_linter}} (tags: configurable, consistency, efficiency, readability)} \item{\code{\link{cyclocomp_linter}} (tags: best_practices, configurable, default, readability, style)} \item{\code{\link{duplicate_argument_linter}} (tags: common_mistakes, configurable, correctness)} \item{\code{\link{empty_assignment_linter}} (tags: best_practices, readability)} From b26f492221bb8fc4059a712fc58c96a0839c8af2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 01:09:44 +0000 Subject: [PATCH 05/11] catch .by/.keep in earlier call --- R/consecutive_mutate_linter.R | 14 ++++++-------- tests/testthat/test-consecutive_mutate_linter.R | 2 ++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index a05b9bd50f..8eac0c6a24 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -44,16 +44,14 @@ consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { # starting like mutate(DF, ...) %>% foo() %>% mutate(). # similarly, expr[1][expr[call='mutate']] covers pipelines # starting like mutate(DF, ...) %>% mutate(...) + mutate_cond <- xp_and( + "expr/SYMBOL_FUNCTION_CALL[text() = 'mutate']", + "not(SYMBOL_SUB[text() = '.keep' or text() = '.by'])" + ) xpath <- glue(" (//PIPE | //SPECIAL[{ xp_text_in_table(magrittr_pipes) }]) - /preceding-sibling::expr[ - expr[2][expr/SYMBOL_FUNCTION_CALL[text() = 'mutate']] - or expr/SYMBOL_FUNCTION_CALL[text() = 'mutate'] - ] - /following-sibling::expr[ - expr/SYMBOL_FUNCTION_CALL[text() = 'mutate'] - and not(SYMBOL_SUB[text() = '.keep' or text() = '.by']) - ] + /preceding-sibling::expr[expr[2][{ mutate_cond }] or ({ mutate_cond })] + /following-sibling::expr[{ mutate_cond }] ") Linter(function(source_expression) { diff --git a/tests/testthat/test-consecutive_mutate_linter.R b/tests/testthat/test-consecutive_mutate_linter.R index ceea194504..2ebbdccdf5 100644 --- a/tests/testthat/test-consecutive_mutate_linter.R +++ b/tests/testthat/test-consecutive_mutate_linter.R @@ -12,6 +12,8 @@ test_that("consecutive_mutate_linter skips allowed usages", { # new dplyr: .keep and .by arguments are ignored expect_lint("DF %>% mutate(a = 1) %>% mutate(a = a / sum(a), .by = b)", NULL, linter) expect_lint("DF %>% mutate(a = 1) %>% mutate(a = b, .keep = 'none')", NULL, linter) + expect_lint("DF %>% mutate(a = a / sum(a), .by = b) %>% mutate(c = 1)", NULL, linter) + expect_lint("DF %>% mutate(a = 1, .keep = 'none') %>% mutate(a = a + 1)", NULL, linter) }) test_that("consecutive_mutate_linter skips files loading SQL backends", { From 4f1337ff1ebecc3d9dff8d27eb2d4d283ecfc3d3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 01:33:42 +0000 Subject: [PATCH 06/11] progress towards more robust detecting of invalid backends --- R/consecutive_mutate_linter.R | 12 ++- .../testthat/test-consecutive_mutate_linter.R | 92 +++++++++++++++---- 2 files changed, 80 insertions(+), 24 deletions(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 8eac0c6a24..60eb8ec23c 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -32,10 +32,12 @@ #' @seealso [linters] for a complete list of linters available in lintr. #' @export consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { - blocked_library_xpath <- glue(" - //SYMBOL_FUNCTION_CALL[text() = 'library'] + # NB: ignore R"()" strings for simplicity here. + attach_pkg_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'library' or text() = 'require'] /parent::expr - /following-sibling::expr[SYMBOL[{ xp_text_in_table(invalid_backends) }]] + /following-sibling::expr + /*[self::SYMBOL or self::STR_CONST] ") # match on the expr, not the SYMBOL_FUNCTION_CALL, to ensure @@ -62,8 +64,8 @@ consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { xml <- source_expression$full_xml_parsed_content - blocked_expr <- xml_find_first(xml, blocked_library_xpath) - if (!is.na(blocked_expr)) { + attach_str <- get_r_string(xml_find_all(xml, attach_pkg_xpath)) + if (any(invalid_backends %in% attach_str)) { return(list()) } diff --git a/tests/testthat/test-consecutive_mutate_linter.R b/tests/testthat/test-consecutive_mutate_linter.R index 2ebbdccdf5..b802fe67c3 100644 --- a/tests/testthat/test-consecutive_mutate_linter.R +++ b/tests/testthat/test-consecutive_mutate_linter.R @@ -16,25 +16,79 @@ test_that("consecutive_mutate_linter skips allowed usages", { expect_lint("DF %>% mutate(a = 1, .keep = 'none') %>% mutate(a = a + 1)", NULL, linter) }) -test_that("consecutive_mutate_linter skips files loading SQL backends", { - expect_lint( - trim_some(" - library(dbplyr) - DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) - "), - NULL, - consecutive_mutate_linter() - ) - - expect_lint( - trim_some(" - library(custom.backend) - DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) - "), - NULL, - consecutive_mutate_linter(invalid_backends = "custom.backend") - ) -}) +patrick::with_parameters_test_that( + "consecutive_mutate_linter skips files loading SQL backends", + { + linter <- consecutive_mutate_linter(invalid_backends = backend) + + expect_lint( + trim_some(glue::glue(" + library({backend}) + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(glue::glue(" + require('{backend}') + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + ")), + NULL, + linter + ) + + expect_lint( + trim_some(" + conn %>% + tbl(dbplyr::sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + "), + NULL, + linter + ) + + expect_lint( + trim_some(" + conn %>% + tbl(dbplyr:::sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + "), + NULL, + linter + ) + + expect_lint( + trim_some(" + #' @import dbplyr + NULL + + DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) + "), + NULL, + linter + ) + + expect_lint( + trim_some(" + #' @importFrom dbplyr sql + NULL + + conn %>% + tbl(sql('SELECT 1 AS x')) %>% + mutate(a = x + 1) %>% + mutate(b = a + 1) + "), + NULL, + linter + ) + }, + .test_name = c("dbplyr", "custom.backend"), + backend = c("dbplyr", "custom.backend") +) test_that("consecutive_mutate_linter blocks simple disallowed usages", { linter <- consecutive_mutate_linter() From 57fa3043e65e591d93d2f63aef0848a103f88c5b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 04:28:03 +0000 Subject: [PATCH 07/11] more robustness for backend checks --- R/consecutive_mutate_linter.R | 17 ++++++++++++- .../testthat/test-consecutive_mutate_linter.R | 24 +++++++++---------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 60eb8ec23c..1bae699780 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -32,7 +32,6 @@ #' @seealso [linters] for a complete list of linters available in lintr. #' @export consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { - # NB: ignore R"()" strings for simplicity here. attach_pkg_xpath <- glue(" //SYMBOL_FUNCTION_CALL[text() = 'library' or text() = 'require'] /parent::expr @@ -40,6 +39,17 @@ consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { /*[self::SYMBOL or self::STR_CONST] ") + namespace_xpath <- glue(" + //SYMBOL_PACKAGE[{ xp_text_in_table(invalid_backends) }] + | + //COMMENT[ + contains(text(), '@import') + and ( + {xp_or(sprintf(\"contains(text(), '%s')\", invalid_backends))} + ) + ] + ") + # match on the expr, not the SYMBOL_FUNCTION_CALL, to ensure # namespace-qualified calls only match if the namespaces do. # expr[2] needed in expr[1][expr[2]] to skip matches on pipelines @@ -69,6 +79,11 @@ consecutive_mutate_linter <- function(invalid_backends = "dbplyr") { return(list()) } + namespace_expr <- xml_find_first(xml, namespace_xpath) + if (!is.na(namespace_expr)) { + return(list()) + } + bad_expr <- xml_find_all(xml, xpath) xml_nodes_to_lints( diff --git a/tests/testthat/test-consecutive_mutate_linter.R b/tests/testthat/test-consecutive_mutate_linter.R index b802fe67c3..641a0b3592 100644 --- a/tests/testthat/test-consecutive_mutate_linter.R +++ b/tests/testthat/test-consecutive_mutate_linter.R @@ -40,48 +40,48 @@ patrick::with_parameters_test_that( ) expect_lint( - trim_some(" + trim_some(glue(" conn %>% - tbl(dbplyr::sql('SELECT 1 AS x')) %>% + tbl({backend}::sql('SELECT 1 AS x')) %>% mutate(a = x + 1) %>% mutate(b = a + 1) - "), + ")), NULL, linter ) expect_lint( - trim_some(" + trim_some(glue(" conn %>% - tbl(dbplyr:::sql('SELECT 1 AS x')) %>% + tbl({backend}:::sql('SELECT 1 AS x')) %>% mutate(a = x + 1) %>% mutate(b = a + 1) - "), + ")), NULL, linter ) expect_lint( - trim_some(" - #' @import dbplyr + trim_some(glue(" + #' @import {backend} NULL DF %>% mutate(a = a + 1) %>% mutate(b = a - 2) - "), + ")), NULL, linter ) expect_lint( - trim_some(" - #' @importFrom dbplyr sql + trim_some(glue(" + #' @importFrom {backend} sql NULL conn %>% tbl(sql('SELECT 1 AS x')) %>% mutate(a = x + 1) %>% mutate(b = a + 1) - "), + ")), NULL, linter ) From 6ef9f19f3c011ad806ac058911ab3b4b10e2a161 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 17:20:26 -0800 Subject: [PATCH 08/11] delint example --- R/consecutive_mutate_linter.R | 5 +++++ man/consecutive_mutate_linter.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 1bae699780..c2906cf577 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -21,6 +21,11 @@ #' ) #' #' # okay +#' lint( +#' text = "x %>% mutate(a = 1, b = 2)", +#' linters = consecutive_mutate_linter() +#' ) +#' #' code <- "library(dbplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" #' writeLines(code) #' lint( diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd index 4553fe6d24..945fe06806 100644 --- a/man/consecutive_mutate_linter.Rd +++ b/man/consecutive_mutate_linter.Rd @@ -30,6 +30,11 @@ lint( ) # okay +lint( + text = "x \%>\% mutate(a = 1, b = 2)", + linters = consecutive_mutate_linter() +) + code <- "library(dbplyr)\nx \%>\% mutate(a = 1) \%>\% mutate(a = a + 1)" writeLines(code) lint( From 650b96b201c3010a5f2d3bfc50e9804892ab20d5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 18 Nov 2023 17:46:34 -0800 Subject: [PATCH 09/11] delint --- R/consecutive_mutate_linter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index c2906cf577..1988168a70 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -25,7 +25,7 @@ #' text = "x %>% mutate(a = 1, b = 2)", #' linters = consecutive_mutate_linter() #' ) -#' +#' #' code <- "library(dbplyr)\nx %>% mutate(a = 1) %>% mutate(a = a + 1)" #' writeLines(code) #' lint( From c15598139e74846376f644fbca9eb1a3c35bb2a4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 20 Nov 2023 21:06:44 +0000 Subject: [PATCH 10/11] correct merge --- man/linters.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/linters.Rd b/man/linters.Rd index 2b2c65b927..09917ed985 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -19,8 +19,8 @@ The following tags exist: \itemize{ \item{\link[=best_practices_linters]{best_practices} (62 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (10 linters)} -\item{\link[=configurable_linters]{configurable} (36 linters)} -\item{\link[=consistency_linters]{consistency} (29 linters)} +\item{\link[=configurable_linters]{configurable} (37 linters)} +\item{\link[=consistency_linters]{consistency} (30 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} @@ -28,7 +28,7 @@ The following tags exist: \item{\link[=executing_linters]{executing} (6 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (62 linters)} +\item{\link[=readability_linters]{readability} (63 linters)} \item{\link[=regex_linters]{regex} (4 linters)} \item{\link[=robustness_linters]{robustness} (17 linters)} \item{\link[=style_linters]{style} (39 linters)} From 3bbf6a2aeda39ffa7a7ebb55071360c2318d4b4d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 20 Nov 2023 21:36:59 +0000 Subject: [PATCH 11/11] re-title Rd --- R/consecutive_mutate_linter.R | 2 +- man/consecutive_mutate_linter.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/consecutive_mutate_linter.R b/R/consecutive_mutate_linter.R index 1988168a70..6d1032fc81 100644 --- a/R/consecutive_mutate_linter.R +++ b/R/consecutive_mutate_linter.R @@ -1,4 +1,4 @@ -#' Force consecutive calls to mutate() into just one when possible +#' Require consecutive calls to mutate() to be combined when possible #' #' `dplyr::mutate()` accepts any number of columns, so sequences like #' `DF %>% dplyr::mutate(..1) %>% dplyr::mutate(..2)` are redundant -- diff --git a/man/consecutive_mutate_linter.Rd b/man/consecutive_mutate_linter.Rd index 945fe06806..a083ce7ab6 100644 --- a/man/consecutive_mutate_linter.Rd +++ b/man/consecutive_mutate_linter.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/consecutive_mutate_linter.R \name{consecutive_mutate_linter} \alias{consecutive_mutate_linter} -\title{Force consecutive calls to mutate() into just one when possible} +\title{Require consecutive calls to mutate() to be combined when possible} \usage{ consecutive_mutate_linter(invalid_backends = "dbplyr") }