diff --git a/NEWS.md b/NEWS.md index c7f8a724..5bf07c57 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,10 @@ * Added support for `base::cummax()` (#323). +## Bug fixes + +* Fix `NA` handling in `cummin()`, `cumprod()`, `cumsum()` (#Yousa-Mirage, #326). + # tidypolars 0.17.0 `tidypolars` requires `polars` >= 1.9.0 and `dplyr` >= 1.2.0. diff --git a/R/funs-default.R b/R/funs-default.R index 2aacab8e..b5f25c71 100644 --- a/R/funs-default.R +++ b/R/funs-default.R @@ -103,22 +103,28 @@ pl_cosh <- function(x, ...) { pl_cummax <- function(x, ...) { check_empty_dots(...) - x$cum_max() + # Once a missing value is seen, keep a TRUE mask for all following rows. + has_seen_na <- x$is_null()$cum_max() + # Replace cumulative results with NA from the first missing value onward. + pl$when(has_seen_na)$then(pl$lit(NA))$otherwise(x$cum_max()) } pl_cummin <- function(x, ...) { check_empty_dots(...) - x$cum_min() + has_seen_na <- x$is_null()$cum_max() + pl$when(has_seen_na)$then(pl$lit(NA))$otherwise(x$cum_min()) } pl_cumprod <- function(x, ...) { check_empty_dots(...) - x$cum_prod() + has_seen_na <- x$is_null()$cum_max() + pl$when(has_seen_na)$then(pl$lit(NA))$otherwise(x$cum_prod()) } pl_cumsum <- function(x, ...) { check_empty_dots(...) - x$cum_sum() + has_seen_na <- x$is_null()$cum_max() + pl$when(has_seen_na)$then(pl$lit(NA))$otherwise(x$cum_sum()) } # TODO: this is not tested anymore because it requires reframe(): diff --git a/tests/testthat/test-funs_math-lazy.R b/tests/testthat/test-funs_math-lazy.R index de3e6ca1..0ac99954 100644 --- a/tests/testthat/test-funs_math-lazy.R +++ b/tests/testthat/test-funs_math-lazy.R @@ -9,7 +9,6 @@ test_that("mathematical functions work", { value = sample(11:15), value_trigo = seq(0, 0.4, 0.1), value_mix = -2:2, - value_with_NA = c(-2, -1, NA, 1, 2) ) test_pl <- as_polars_lf(test) @@ -76,6 +75,77 @@ test_that("mathematical functions work", { } }) +test_that("mathematical functions work with NA", { + test <- tibble( + value_with_NA = c(11, 12, NA, 14, 15), + value_trigo_with_NA = c(0, 0.1, NA, 0.3, 0.4), + value_mix_with_NA = c(-2, -1, NA, 1, 2) + ) + test_pl <- as_polars_lf(test) + + for (i in c( + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "ceiling", + "cos", + "cosh", + "cummax", + "cummin", + "cumprod", + "cumsum", + "exp", + "first", + "floor", + "last", + "log", + "log10", + # "rank", inconsistent behavior between R and Polars with NA + "sign", + "sin", + "sinh", + # "sort", inconsistent behavior between R and Polars with NA + "sqrt", + "tan", + "tanh" + )) { + if ( + i %in% + c( + "acos", + "asin", + "atan", + "atanh", + "ceiling", + "cos", + "floor", + "sin", + "tan" + ) + ) { + variable <- "value_trigo_with_NA" + } else if (i %in% c("abs", "mean")) { + variable <- "value_mix_with_NA" + } else { + variable <- "value_with_NA" + } + + pol <- paste0("mutate(test_pl, foo =", i, "(", variable, "))") |> + str2lang() |> + eval() + + res <- paste0("mutate(test, foo =", i, "(", variable, "))") |> + str2lang() |> + eval() + + expect_equal_lazy(pol, res, info = i) + } +}) + test_that("warns if unknown args", { test <- tibble( x1 = c("a", "a", "b", "a", "c"), diff --git a/tests/testthat/test-funs_math.R b/tests/testthat/test-funs_math.R index c1310955..ae6a21f4 100644 --- a/tests/testthat/test-funs_math.R +++ b/tests/testthat/test-funs_math.R @@ -5,7 +5,6 @@ test_that("mathematical functions work", { value = sample(11:15), value_trigo = seq(0, 0.4, 0.1), value_mix = -2:2, - value_with_NA = c(-2, -1, NA, 1, 2) ) test_pl <- as_polars_df(test) @@ -72,6 +71,77 @@ test_that("mathematical functions work", { } }) +test_that("mathematical functions work with NA", { + test <- tibble( + value_with_NA = c(11, 12, NA, 14, 15), + value_trigo_with_NA = c(0, 0.1, NA, 0.3, 0.4), + value_mix_with_NA = c(-2, -1, NA, 1, 2) + ) + test_pl <- as_polars_df(test) + + for (i in c( + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atanh", + "ceiling", + "cos", + "cosh", + "cummax", + "cummin", + "cumprod", + "cumsum", + "exp", + "first", + "floor", + "last", + "log", + "log10", + # "rank", inconsistent behavior between R and Polars with NA + "sign", + "sin", + "sinh", + # "sort", inconsistent behavior between R and Polars with NA + "sqrt", + "tan", + "tanh" + )) { + if ( + i %in% + c( + "acos", + "asin", + "atan", + "atanh", + "ceiling", + "cos", + "floor", + "sin", + "tan" + ) + ) { + variable <- "value_trigo_with_NA" + } else if (i %in% c("abs", "mean")) { + variable <- "value_mix_with_NA" + } else { + variable <- "value_with_NA" + } + + pol <- paste0("mutate(test_pl, foo =", i, "(", variable, "))") |> + str2lang() |> + eval() + + res <- paste0("mutate(test, foo =", i, "(", variable, "))") |> + str2lang() |> + eval() + + expect_equal(pol, res, info = i) + } +}) + test_that("warns if unknown args", { test <- tibble( x1 = c("a", "a", "b", "a", "c"),