Skip to content

Commit e9372dd

Browse files
committed
More 2d summary/bin refactoring. #1274
1 parent 3c19a96 commit e9372dd

10 files changed

+191
-207
lines changed

NEWS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
ggplot2 1.0.1.9xxx
22
----------------------------------------------------------------
33

4+
* It's now obvious that you can set the `binwidth` parameter for
5+
`stat_bin_hex()`, `stat_summary_hex()`, `stat_bin_2d()`, and
6+
`stat_summary_2d()`. `stat_summary_2d()` and `stat_bin_2d()` now share
7+
exactly the same code for determining breaks from `bins`, `binwidth`, and
8+
`origin`. `stat_summary_2d()` and `stat_bin_2d()` now output in tile/raster
9+
compatible form instead of rect form.
10+
411
* For consistency with the summary functions, `stat_binhex()` and `stat_bin2d()`
512
have been renamed to `stat_bin_hex()` and `stat_bin_2d()` (the existing
613
functions will continue to work but will be deprecated in the future)

R/geom-bin2d.r

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ geom_bin2d <- function(mapping = NULL, data = NULL, stat = "bin2d",
2626
data = data,
2727
mapping = mapping,
2828
stat = stat,
29-
geom = GeomRect,
29+
geom = GeomRaster,
3030
position = position,
3131
show.legend = show.legend,
3232
inherit.aes = inherit.aes,

R/stat-bin2d.r

Lines changed: 89 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
#' @param bins numeric vector giving number of bins in both vertical and
22
#' horizontal directions. Set to 30 by default.
3+
#' @param binwidth Numeric vector giving bin width in both vertical and
4+
#' horizontal directions. Overrides \code{bins} if both set.
35
#' @param drop if \code{TRUE} removes all cells with 0 counts.
46
#' @export
57
#' @aliases stat_bin2d
68
#' @rdname geom_bin2d
7-
stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "rect",
8-
position = "identity", bins = 30, drop = TRUE,
9-
show.legend = NA, inherit.aes = TRUE, ...) {
9+
stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "raster",
10+
position = "identity", bins = 30, binwidth = NULL,
11+
drop = TRUE, show.legend = NA, inherit.aes = TRUE, ...) {
1012
layer(
1113
data = data,
1214
mapping = mapping,
@@ -17,6 +19,7 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "rect",
1719
inherit.aes = inherit.aes,
1820
stat_params = list(
1921
bins = bins,
22+
binwidth = binwidth,
2023
drop = drop
2124
),
2225
params = list(...)
@@ -36,79 +39,98 @@ StatBin2d <- ggproto("StatBin2d", Stat,
3639

3740
compute_group = function(data, panel_info, binwidth = NULL, bins = 30,
3841
breaks = NULL, origin = NULL, drop = TRUE, ...) {
39-
range <- list(
40-
x = scale_dimension(panel_info$x, c(0, 0)),
41-
y = scale_dimension(panel_info$y, c(0, 0))
42-
)
43-
44-
# is.integer(...) below actually deals with factor input data, which is
45-
# integer by now. Bins for factor data should take the width of one level,
46-
# and should show up centered over their tick marks.
47-
48-
# Determine origin, if omitted
49-
if (is.null(origin)) {
50-
origin <- c(NA, NA)
51-
} else {
52-
stopifnot(is.numeric(origin))
53-
stopifnot(length(origin) == 2)
54-
}
55-
originf <- function(x) if (is.integer(x)) -0.5 else min(x, na.rm = TRUE)
56-
if (is.na(origin[1])) origin[1] <- originf(data$x)
57-
if (is.na(origin[2])) origin[2] <- originf(data$y)
58-
59-
# Determine binwidth, if omitted
60-
if (is.null(binwidth)) {
61-
binwidth <- c(NA, NA)
62-
if (is.integer(data$x)) {
63-
binwidth[1] <- 1
64-
} else {
65-
binwidth[1] <- diff(range$x) / bins
66-
}
67-
if (is.integer(data$y)) {
68-
binwidth[2] <- 1
69-
} else {
70-
binwidth[2] <- diff(range$y) / bins
71-
}
72-
}
73-
stopifnot(is.numeric(binwidth))
74-
stopifnot(length(binwidth) == 2)
7542

76-
# Determine breaks, if omitted
77-
if (is.null(breaks)) {
78-
breaks <- list(x = NULL, y = NULL)
79-
}
43+
origin <- dual_param(origin, list(NULL, NULL))
44+
binwidth <- dual_param(binwidth, list(NULL, NULL))
45+
breaks <- dual_param(breaks, list(NULL, NULL))
46+
bins <- dual_param(bins, list(x = 30, y = 30))
8047

81-
stopifnot(length(breaks) == 2)
82-
names(breaks) <- c("x", "y")
48+
xbreaks <- bin_breaks(panel_info$x, breaks$x, origin$x, binwidth$x, bins$x)
49+
ybreaks <- bin_breaks(panel_info$y, breaks$y, origin$y, binwidth$y, bins$y)
8350

84-
if (is.null(breaks$x)) {
85-
breaks$x <- seq(origin[1], max(range$x) + binwidth[1], binwidth[1])
86-
}
87-
if (is.null(breaks$y)) {
88-
breaks$y <- seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
51+
xbin <- cut(data$x, xbreaks, include.lowest = TRUE, label = FALSE)
52+
ybin <- cut(data$y, ybreaks, include.lowest = TRUE, label = FALSE)
53+
54+
if (is.null(data$weight))
55+
data$weight <- 1
56+
57+
out <- tapply_df(data$weight, list(xbin = xbin, ybin = ybin), sum, drop = drop)
58+
59+
xdim <- bin_loc(xbreaks, out$xbin)
60+
out$x <- xdim$mid
61+
out$width <- xdim$length
62+
63+
ydim <- bin_loc(ybreaks, out$ybin)
64+
out$y <- ydim$mid
65+
out$height <- ydim$length
66+
67+
out$count <- out$value
68+
out$density <- out$count / sum(out$count, na.rm = TRUE)
69+
out
70+
}
71+
)
72+
73+
dual_param <- function(x, default = list(x = NULL, y = NULL)) {
74+
if (is.null(x)) {
75+
default
76+
} else if (length(x) == 2) {
77+
if (is.list(x) && !is.null(names(x))) {
78+
x
79+
} else {
80+
list(x = x[[1]], y = x[[2]])
8981
}
82+
} else {
83+
list(x = x, y = x)
84+
}
85+
}
9086

91-
stopifnot(is.list(breaks))
92-
stopifnot(all(sapply(breaks, is.numeric)))
87+
bin_breaks <- function(scale, breaks = NULL, origin = NULL, binwidth = NULL,
88+
bins = 30, right = 30) {
89+
# Bins for categorical data should take the width of one level,
90+
# and should show up centered over their tick marks. All other parameters
91+
# are ignored.
92+
if (inherits(scale, "discrete")) {
93+
breaks <- scale_breaks(scale)
94+
return(-0.5 + seq_len(length(breaks) + 1))
95+
}
9396

94-
xbin <- cut(data$x, sort(breaks$x), include.lowest = TRUE)
95-
ybin <- cut(data$y, sort(breaks$y), include.lowest = TRUE)
97+
if (!is.null(breaks))
98+
return(breaks)
9699

97-
if (is.null(data$weight)) data$weight <- 1
100+
range <- scale_limits(scale)
98101

99-
counts <- as.data.frame(
100-
xtabs(weight ~ xbin + ybin, data), responseName = "count")
101-
if (drop) counts <- subset(counts, count > 0)
102+
if (is.null(binwidth) || identical(binwidth, NA)) {
103+
binwidth <- diff(range) / bins
104+
}
105+
stopifnot(is.numeric(binwidth), length(binwidth) == 1)
102106

103-
counts$xint <- as.numeric(counts$xbin)
104-
counts$xmin <- breaks$x[counts$xint]
105-
counts$xmax <- breaks$x[counts$xint + 1]
107+
if (is.null(origin) || identical(origin, NA)) {
108+
origin <- plyr::round_any(range[1], binwidth, floor)
109+
}
110+
stopifnot(is.numeric(origin), length(origin) == 1)
106111

107-
counts$yint <- as.numeric(counts$ybin)
108-
counts$ymin <- breaks$y[counts$yint]
109-
counts$ymax <- breaks$y[counts$yint + 1]
112+
breaks <- seq(origin, range[2] + binwidth, binwidth)
113+
adjust_breaks(breaks, right)
114+
}
110115

111-
counts$density <- counts$count / sum(counts$count, na.rm = TRUE)
112-
counts
116+
adjust_breaks <- function(x, right = TRUE) {
117+
diddle <- 1e-07 * stats::median(diff(x))
118+
if (right) {
119+
fuzz <- c(-diddle, rep.int(diddle, length(x) - 1))
120+
} else {
121+
fuzz <- c(rep.int(-diddle, length(x) - 1), diddle)
113122
}
114-
)
123+
sort(x) + fuzz
124+
}
125+
126+
bin_loc <- function(x, id) {
127+
left <- x[-length(x)]
128+
right <- x[-1]
129+
130+
list(
131+
left = left[id],
132+
right = right[id],
133+
mid = ((left + right) / 2)[id],
134+
length = diff(x)[id]
135+
)
136+
}

R/stat-binhex.r

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
#' @export
22
#' @rdname geom_hex
3-
#' @param bins numeric vector specifying number of bins in both x and y
4-
#' directions. Set to 30 by default.
5-
#' @inheritParams stat_identity
3+
#' @inheritParams stat_bin_2d
64
#' @param na.rm If \code{FALSE} (the default), removes missing values with
75
#' a warning. If \code{TRUE} silently removes missing values.
86
#' @aliases stat_binhex
97
stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex",
10-
position = "identity", bins = 30, na.rm = FALSE,
11-
show.legend = NA, inherit.aes = TRUE, ...) {
8+
position = "identity", bins = 30, binwidth = NULL,
9+
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
10+
...) {
1211
layer(
1312
data = data,
1413
mapping = mapping,
@@ -18,7 +17,8 @@ stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex",
1817
show.legend = show.legend,
1918
inherit.aes = inherit.aes,
2019
stat_params = list(
21-
bins = bins
20+
bins = bins,
21+
binwidth = binwidth
2222
),
2323
params = list(...)
2424
)

R/stat-summary-2d.r

Lines changed: 39 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
#' if (requireNamespace("hexbin")) {
3434
#' d + stat_summary_hex()
3535
#' }
36-
stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "rect",
37-
position = "identity", bins = 30, drop = TRUE,
38-
fun = "mean", fun.args = list(), show.legend = NA,
39-
inherit.aes = TRUE, ...) {
36+
stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "raster",
37+
position = "identity", bins = 30, binwidth = NULL,
38+
drop = TRUE, fun = "mean", fun.args = list(),
39+
show.legend = NA, inherit.aes = TRUE, ...) {
4040
layer(
4141
data = data,
4242
mapping = mapping,
@@ -47,6 +47,7 @@ stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "rect",
4747
inherit.aes = inherit.aes,
4848
stat_params = list(
4949
bins = bins,
50+
binwidth = binwidth,
5051
drop = drop,
5152
fun = fun,
5253
fun.args = fun.args
@@ -75,70 +76,47 @@ StatSummary2d <- ggproto("StatSummary2d", Stat,
7576
compute_group = function(data, panel_info, binwidth = NULL, bins = 30,
7677
breaks = NULL, origin = NULL, drop = TRUE,
7778
fun = "mean", fun.args = list(), ...) {
78-
range <- list(
79-
x = scale_dimension(panel_info$x, c(0, 0)),
80-
y = scale_dimension(panel_info$y, c(0, 0))
81-
)
79+
origin <- dual_param(origin, list(NULL, NULL))
80+
binwidth <- dual_param(binwidth, list(NULL, NULL))
81+
breaks <- dual_param(breaks, list(NULL, NULL))
82+
bins <- dual_param(bins, list(x = 30, y = 30))
8283

83-
# Determine origin, if omitted
84-
if (is.null(origin)) {
85-
origin <- c(NA, NA)
86-
} else {
87-
stopifnot(is.numeric(origin))
88-
stopifnot(length(origin) == 2)
89-
}
90-
originf <- function(x) if (is.integer(x)) -0.5 else min(x)
91-
if (is.na(origin[1])) origin[1] <- originf(data$x)
92-
if (is.na(origin[2])) origin[2] <- originf(data$y)
84+
xbreaks <- bin_breaks(panel_info$x, breaks$x, origin$x, binwidth$x, bins$x)
85+
ybreaks <- bin_breaks(panel_info$y, breaks$y, origin$y, binwidth$y, bins$y)
9386

94-
# Determine binwidth, if omitted
95-
if (is.null(binwidth)) {
96-
binwidth <- c(NA, NA)
97-
if (is.integer(data$x)) {
98-
binwidth[1] <- 1
99-
} else {
100-
binwidth[1] <- diff(range$x) / bins
101-
}
102-
if (is.integer(data$y)) {
103-
binwidth[2] <- 1
104-
} else {
105-
binwidth[2] <- diff(range$y) / bins
106-
}
107-
}
108-
stopifnot(is.numeric(binwidth))
109-
stopifnot(length(binwidth) == 2)
87+
xbin <- cut(data$x, xbreaks, include.lowest = TRUE, label = FALSE)
88+
ybin <- cut(data$y, ybreaks, include.lowest = TRUE, label = FALSE)
11089

111-
# Determine breaks, if omitted
112-
if (is.null(breaks)) {
113-
breaks <- list(
114-
seq(origin[1], max(range$x) + binwidth[1], binwidth[1]),
115-
seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
116-
)
117-
} else {
118-
stopifnot(is.list(breaks))
119-
stopifnot(length(breaks) == 2)
120-
stopifnot(all(sapply(breaks, is.numeric)))
90+
f <- function(x) {
91+
do.call(fun, c(list(quote(x)), fun.args))
12192
}
122-
names(breaks) <- c("x", "y")
93+
out <- tapply_df(data$z, list(xbin = xbin, ybin = ybin), f, drop = drop)
12394

124-
xbin <- cut(data$x, sort(breaks$x), include.lowest = TRUE)
125-
ybin <- cut(data$y, sort(breaks$y), include.lowest = TRUE)
95+
xdim <- bin_loc(xbreaks, out$xbin)
96+
out$x <- xdim$mid
97+
out$width <- xdim$length
12698

127-
if (is.null(data$weight)) data$weight <- 1
99+
ydim <- bin_loc(ybreaks, out$ybin)
100+
out$y <- ydim$mid
101+
out$height <- ydim$length
102+
103+
out
104+
}
105+
)
128106

129-
ans <- plyr::ddply(data.frame(data, xbin, ybin), c("xbin", "ybin"), function(d) {
130-
val <- do.call(fun, c(list(quote(d$z)), fun.args))
131-
data.frame(value = val)
132-
})
133-
if (drop) ans <- stats::na.omit(ans)
107+
# Adaptation of tapply that returns a data frame instead of a matrix
108+
tapply_df <- function(x, index, fun, ..., drop = TRUE) {
109+
labels <- lapply(index, ulevels)
110+
out <- expand.grid(labels, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
134111

135-
ans$xint <- as.numeric(ans$xbin)
136-
ans$xmin <- breaks$x[ans$xint]
137-
ans$xmax <- breaks$x[ans$xint + 1]
112+
grps <- split(x, index)
113+
names(grps) <- NULL
114+
out$value <- unlist(lapply(grps, fun, ...))
138115

139-
ans$yint <- as.numeric(ans$ybin)
140-
ans$ymin <- breaks$y[ans$yint]
141-
ans$ymax <- breaks$y[ans$yint + 1]
142-
ans
116+
if (drop) {
117+
n <- vapply(grps, length, integer(1))
118+
out <- out[n > 0, , drop = FALSE]
143119
}
144-
)
120+
121+
out
122+
}

R/stat-summary-hex.r

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#' @export
22
#' @rdname stat_summary_2d
3-
#' @inheritParams stat_binhex
3+
#' @inheritParams stat_bin_hex
44
stat_summary_hex <- function(mapping = NULL, data = NULL, geom = "hex",
5-
position = "identity", bins = 30, drop = TRUE,
6-
fun = "mean", fun.args = list(), show.legend = NA,
7-
inherit.aes = TRUE, ...) {
5+
position = "identity", bins = 30, binwidth = NULL,
6+
drop = TRUE, fun = "mean", fun.args = list(),
7+
show.legend = NA, inherit.aes = TRUE, ...) {
88
layer(
99
data = data,
1010
mapping = mapping,
@@ -15,6 +15,7 @@ stat_summary_hex <- function(mapping = NULL, data = NULL, geom = "hex",
1515
inherit.aes = inherit.aes,
1616
stat_params = list(
1717
bins = bins,
18+
binwidth = binwidth,
1819
drop = drop,
1920
fun = fun,
2021
fun.args = fun.args

0 commit comments

Comments
 (0)