Skip to content

Commit d58b00a

Browse files
committed
Use same density estimation code in stat_density and stat_ydensity.
Fixes #972
1 parent 403b92f commit d58b00a

File tree

6 files changed

+88
-70
lines changed

6 files changed

+88
-70
lines changed

R/position-collide.r

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,10 @@ pos_stack <- function(df, width) {
6767
heights <- c(0, cumsum(y))
6868
}
6969

70-
within(df, {
71-
ymin <- heights[-n]
72-
ymax <- heights[-1]
73-
y <- ymax
74-
})
70+
df$ymin <- heights[-n]
71+
df$ymax <- heights[-1]
72+
df$y <- df$ymax
73+
df
7574
}
7675

7776
# Stack overlapping intervals and set height to 1.

R/stat-density.r

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
#' @param adjust see \code{\link{density}} for details
77
#' @param kernel kernel used for density estimation, see
88
#' \code{\link{density}} for details
9-
#' @param trim if \code{TRUE}, the default, densities are trimmed to the
10-
#' actual range of the data. If \code{FALSE}, they are extended by the
11-
#' default 3 bandwidths (as specified by the \code{cut} parameter to
12-
#' \code{\link{density}})
9+
#' @param trim This parameter only matters if you are displaying multiple
10+
#' densities in one plot. If \code{FALSE}, the default, each density is
11+
#' computed on the full range of the data. If \code{TRUE}, each density
12+
#' is computed over the range of that group: this typically means the
13+
#' estimated x values will not line-up, and hence you won't be able to
14+
#' stack density values.
1315
#' @param na.rm If \code{FALSE} (the default), removes missing values with
1416
#' a warning. If \code{TRUE} silently removes missing values.
1517
#' @inheritParams stat_identity
@@ -100,25 +102,48 @@ StatDensity <- proto(Stat, {
100102
data <- remove_missing(data, na.rm, "x", name = "stat_density",
101103
finite = TRUE)
102104

103-
n <- nrow(data)
104-
if (n < 3) return(data.frame())
105-
if (is.null(data$weight)) data$weight <- rep(1, n) / n
105+
if (trim) {
106+
range <- range(data$x, na.rm = TRUE)
107+
} else {
108+
range <- scale_dimension(scales$x, c(0, 0))
109+
}
106110

107-
range <- scale_dimension(scales$x, c(0, 0))
108-
xgrid <- seq(range[1], range[2], length=200)
109-
110-
dens <- stats::density(data$x, adjust=adjust, kernel=kernel, weight=data$weight, from=range[1], to=range[2])
111-
densdf <- as.data.frame(dens[c("x","y")])
112-
113-
densdf$scaled <- densdf$y / max(densdf$y, na.rm = TRUE)
114-
if (trim) densdf <- subset(densdf, x > min(data$x, na.rm = TRUE) & x < max(data$x, na.rm = TRUE))
115-
116-
densdf$count <- densdf$y * n
117-
rename(densdf, c(y = "density"), warn_missing = FALSE)
111+
compute_density(data$x, data$w, from = range[1], to = range[2],
112+
adjust = adjust, kernel = kernel)
118113
}
119114

120115
default_geom <- function(.) GeomArea
121116
default_aes <- function(.) aes(y = ..density.., fill=NA)
122117
required_aes <- c("x")
123118

124119
})
120+
121+
compute_density <- function(x, w, from, to, bw = "nrd0", adjust = 1,
122+
kernel = "gaussian") {
123+
n <- length(x)
124+
if (is.null(w)) {
125+
w <- rep(1 / n, n)
126+
}
127+
128+
# if less than 3 points, spread density evenly over points
129+
if (n < 3) {
130+
return(data.frame(
131+
x = x,
132+
density = w / sum(w),
133+
scaled = w / max(w),
134+
count = 1,
135+
n = n
136+
))
137+
}
138+
139+
dens <- stats::density(x, weight = w, bw = bw, adjust = adjust,
140+
kernel = kernel, from = from, to = to)
141+
142+
data.frame(
143+
x = dens$x,
144+
density = dens$y,
145+
scaled = dens$y / max(dens$y, na.rm = TRUE),
146+
count = dens$y * n,
147+
n = n
148+
)
149+
}

R/stat-ydensity.r

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#'
66
#' @inheritParams stat_density
77
#' @inheritParams stat_identity
8-
#' @param trim If \code{TRUE} (default), trim the tails of the violins
9-
#' to the range of the data. If \code{FALSE}, don't trim the tails.
108
#' @param scale if "area" (default), all violins have the same area (before trimming
119
#' the tails). If "count", areas are scaled proportionally to the number of
1210
#' observations. If "width", all violins have the same maximum width.
@@ -60,52 +58,29 @@ StatYdensity <- proto(Stat, {
6058
}
6159

6260
calculate <- function(., data, scales, width=NULL, adjust=1, kernel="gaussian",
63-
trim=TRUE, na.rm = FALSE, ...) {
61+
trim = FALSE, na.rm = FALSE, ...) {
62+
data <- remove_missing(data, na.rm, "x", name = "stat_density",
63+
finite = TRUE)
6464

65-
n <- nrow(data)
66-
67-
# if less than 3 points, return a density of 1 everywhere
68-
if (n < 3) {
69-
return(data.frame(data, density = 1, scaled = 1, count = 1))
70-
}
71-
72-
# initialize weights if they are not supplied by the user
73-
if (is.null(data$weight)) { data$weight <- rep(1, n) / n }
74-
75-
# compute the density
76-
dens <- stats::density(data$y, adjust = adjust, kernel = kernel,
77-
weight = data$weight, n = 200)
78-
79-
# NB: stat_density restricts to the scale range, here we leave that
80-
# free so violins can extend the y scale
81-
densdf <- data.frame(y = dens$x, density = dens$y)
82-
83-
# scale density to a maximum of 1
84-
densdf$scaled <- densdf$density / max(densdf$density, na.rm = TRUE)
85-
86-
# trim density outside of the data range
8765
if (trim) {
88-
densdf <- subset(densdf, y > min(data$y, na.rm = TRUE) & y < max(data$y, na.rm = TRUE))
66+
range <- range(data$y, na.rm = TRUE)
67+
} else {
68+
range <- scale_dimension(scales$y, c(0, 0))
8969
}
90-
# NB: equivalently, we could have used these bounds in the from and
91-
# to arguments of density()
92-
93-
# scale density by the number of observations
94-
densdf$count <- densdf$density * n
95-
# record the number of observations to be able to scale the density later
96-
densdf$n <- n
97-
98-
# coordinate on the x axis
99-
densdf$x <- if (is.factor(data$x)) data$x[1] else mean(range(data$x))
70+
dens <- compute_density(data$y, data$w, from = range[1], to = range[2],
71+
adjust = adjust, kernel = kernel)
10072

101-
# width of the bounding box of the violin plot on the x axis for continuous x
102-
if (length(unique(data$x)) > 1) { width <- diff(range(data$x)) * 0.9 }
103-
densdf$width <- width
73+
dens$y <- dens$x
74+
dens$x <- mean(range(data$x))
10475

105-
densdf
76+
# Compute width if x has multiple values
77+
if (length(unique(data$x)) > 1) {
78+
width <- diff(range(data$x)) * 0.9
79+
}
80+
dens$width <- width
81+
dens
10682
}
10783

10884
default_geom <- function(.) GeomViolin
10985
required_aes <- c("x", "y")
110-
11186
})

inst/tests/test-stat-density.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
context("stat_density") # and stat_ydensity
2+
3+
test_that("compute_density succeeds when variance is zero", {
4+
dens <- compute_density(rep(0, 10), NULL, from = 0.5, to = 0.5)
5+
expect_equal(dens$n, rep(10, 512))
6+
})
7+
8+
test_that("compute_density returns useful df when <3 values", {
9+
dens <- compute_density(c(1, 2), NULL, from = 0, to = 0)
10+
11+
expect_equal(nrow(dens), 2)
12+
expect_equal(names(dens), c("x", "density", "scaled", "count", "n"))
13+
})

man/stat_density.Rd

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,12 @@ on this layer}
2626
\item{kernel}{kernel used for density estimation, see
2727
\code{\link{density}} for details}
2828

29-
\item{trim}{if \code{TRUE}, the default, densities are trimmed to the
30-
actual range of the data. If \code{FALSE}, they are extended by the
31-
default 3 bandwidths (as specified by the \code{cut} parameter to
32-
\code{\link{density}})}
29+
\item{trim}{This parameter only matters if you are displaying multiple
30+
densities in one plot. If \code{FALSE}, the default, each density is
31+
computed on the full range of the data. If \code{TRUE}, each density
32+
is computed over the range of that group: this typically means the
33+
estimated x values will not line-up, and hence you won't be able to
34+
stack density values.}
3335
3436
\item{na.rm}{If \code{FALSE} (the default), removes missing values with
3537
a warning. If \code{TRUE} silently removes missing values.}

man/stat_ydensity.Rd

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ on this layer}
2626
\item{kernel}{kernel used for density estimation, see
2727
\code{\link{density}} for details}
2828

29-
\item{trim}{If \code{TRUE} (default), trim the tails of the violins
30-
to the range of the data. If \code{FALSE}, don't trim the tails.}
29+
\item{trim}{This parameter only matters if you are displaying multiple
30+
densities in one plot. If \code{FALSE}, the default, each density is
31+
computed on the full range of the data. If \code{TRUE}, each density
32+
is computed over the range of that group: this typically means the
33+
estimated x values will not line-up, and hence you won't be able to
34+
stack density values.}
3135
3236
\item{scale}{if "area" (default), all violins have the same area (before trimming
3337
the tails). If "count", areas are scaled proportionally to the number of

0 commit comments

Comments
 (0)