tidyverse
diff --git a/‎NEWS
Lines changed: 7 additions & 0 deletions b/‎NEWS
Lines changed: 7 additions & 0 deletions
diff --git a/‎R/geom-bin2d.r
Lines changed: 1 addition & 1 deletion b/‎R/geom-bin2d.r
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/stat-bin2d.r
Lines changed: 89 additions & 67 deletions b/‎R/stat-bin2d.r
Lines changed: 89 additions & 67 deletions
diff --git a/‎R/stat-binhex.r
Lines changed: 6 additions & 6 deletions b/‎R/stat-binhex.r
Lines changed: 6 additions & 6 deletions
diff --git a/‎R/stat-summary-2d.r
Lines changed: 39 additions & 61 deletions b/‎R/stat-summary-2d.r
Lines changed: 39 additions & 61 deletions
diff --git a/‎R/stat-summary-hex.r
Lines changed: 5 additions & 4 deletions b/‎R/stat-summary-hex.r
Lines changed: 5 additions & 4 deletions
@@ -1,6 +1,13 @@
 ggplot2 1.0.1.9xxx
 ----------------------------------------------------------------
 
+* It's now obvious that you can set the `binwidth` parameter for
+  `stat_bin_hex()`, `stat_summary_hex()`, `stat_bin_2d()`, and
+  `stat_summary_2d()`. `stat_summary_2d()` and `stat_bin_2d()` now share
+  exactly the same code for determining breaks from `bins`, `binwidth`, and
+  `origin`. `stat_summary_2d()` and `stat_bin_2d()` now output in tile/raster
+  compatible form instead of rect form.
+
 * For consistency with the summary functions, `stat_binhex()` and `stat_bin2d()`
   have been renamed to `stat_bin_hex()` and `stat_bin_2d()` (the existing
   functions will continue to work but will be deprecated in the future)
 
@@ -26,7 +26,7 @@ geom_bin2d <- function(mapping = NULL, data = NULL, stat = "bin2d",
     data = data,
     mapping = mapping,
     stat = stat,
-    geom = GeomRect,
+    geom = GeomRaster,
     position = position,
     show.legend = show.legend,
     inherit.aes = inherit.aes,
 
@@ -1,12 +1,14 @@
 #' @param bins numeric vector giving number of bins in both vertical and
 #'   horizontal directions. Set to 30 by default.
+#' @param binwidth Numeric vector giving bin width in both vertical and
+#'   horizontal directions. Overrides \code{bins} if both set.
 #' @param drop if \code{TRUE} removes all cells with 0 counts.
 #' @export
 #' @aliases stat_bin2d
 #' @rdname geom_bin2d
-stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "rect",
-                       position = "identity", bins = 30, drop = TRUE,
-                       show.legend = NA, inherit.aes = TRUE, ...) {
+stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "raster",
+                        position = "identity", bins = 30, binwidth = NULL,
+                        drop = TRUE, show.legend = NA, inherit.aes = TRUE, ...) {
   layer(
     data = data,
     mapping = mapping,
@@ -17,6 +19,7 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "rect",
     inherit.aes = inherit.aes,
     stat_params = list(
       bins = bins,
+      binwidth = binwidth,
       drop = drop
     ),
     params = list(...)
@@ -36,79 +39,98 @@ StatBin2d <- ggproto("StatBin2d", Stat,
 
   compute_group = function(data, panel_info, binwidth = NULL, bins = 30,
                            breaks = NULL, origin = NULL, drop = TRUE, ...) {
-    range <- list(
-      x = scale_dimension(panel_info$x, c(0, 0)),
-      y = scale_dimension(panel_info$y, c(0, 0))
-    )
-
-    # is.integer(...) below actually deals with factor input data, which is
-    # integer by now.  Bins for factor data should take the width of one level,
-    # and should show up centered over their tick marks.
-
-    # Determine origin, if omitted
-    if (is.null(origin)) {
-      origin <- c(NA, NA)
-    } else {
-      stopifnot(is.numeric(origin))
-      stopifnot(length(origin) == 2)
-    }
-    originf <- function(x) if (is.integer(x)) -0.5 else min(x, na.rm = TRUE)
-    if (is.na(origin[1])) origin[1] <- originf(data$x)
-    if (is.na(origin[2])) origin[2] <- originf(data$y)
-
-    # Determine binwidth, if omitted
-    if (is.null(binwidth)) {
-      binwidth <- c(NA, NA)
-      if (is.integer(data$x)) {
-        binwidth[1] <- 1
-      } else {
-        binwidth[1] <- diff(range$x) / bins
-      }
-      if (is.integer(data$y)) {
-        binwidth[2] <- 1
-      } else {
-        binwidth[2] <- diff(range$y) / bins
-      }
-    }
-    stopifnot(is.numeric(binwidth))
-    stopifnot(length(binwidth) == 2)
 
-    # Determine breaks, if omitted
-    if (is.null(breaks)) {
-      breaks <- list(x = NULL, y = NULL)
-    }
+    origin <- dual_param(origin, list(NULL, NULL))
+    binwidth <- dual_param(binwidth, list(NULL, NULL))
+    breaks <- dual_param(breaks, list(NULL, NULL))
+    bins <- dual_param(bins, list(x = 30, y = 30))
 
-    stopifnot(length(breaks) == 2)
-    names(breaks) <- c("x", "y")
+    xbreaks <- bin_breaks(panel_info$x, breaks$x, origin$x, binwidth$x, bins$x)
+    ybreaks <- bin_breaks(panel_info$y, breaks$y, origin$y, binwidth$y, bins$y)
 
-    if (is.null(breaks$x)) {
-      breaks$x <- seq(origin[1], max(range$x) + binwidth[1], binwidth[1])
-    }
-    if (is.null(breaks$y)) {
-      breaks$y <- seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
+    xbin <- cut(data$x, xbreaks, include.lowest = TRUE, label = FALSE)
+    ybin <- cut(data$y, ybreaks, include.lowest = TRUE, label = FALSE)
+
+    if (is.null(data$weight))
+      data$weight <- 1
+
+    out <- tapply_df(data$weight, list(xbin = xbin, ybin = ybin), sum, drop = drop)
+
+    xdim <- bin_loc(xbreaks, out$xbin)
+    out$x <- xdim$mid
+    out$width <- xdim$length
+
+    ydim <- bin_loc(ybreaks, out$ybin)
+    out$y <- ydim$mid
+    out$height <- ydim$length
+
+    out$count <- out$value
+    out$density <- out$count / sum(out$count, na.rm = TRUE)
+    out
+  }
+)
+
+dual_param <- function(x, default = list(x = NULL, y = NULL)) {
+  if (is.null(x)) {
+    default
+  } else if (length(x) == 2) {
+    if (is.list(x) && !is.null(names(x))) {
+      x
+    } else {
+      list(x = x[[1]], y = x[[2]])
     }
+  } else {
+    list(x = x, y = x)
+  }
+}
 
-    stopifnot(is.list(breaks))
-    stopifnot(all(sapply(breaks, is.numeric)))
+bin_breaks <- function(scale, breaks = NULL, origin = NULL, binwidth = NULL,
+                      bins = 30, right = 30) {
+  # Bins for categorical data should take the width of one level,
+  # and should show up centered over their tick marks. All other parameters
+  # are ignored.
+  if (inherits(scale, "discrete")) {
+    breaks <- scale_breaks(scale)
+    return(-0.5 + seq_len(length(breaks) + 1))
+  }
 
-    xbin <- cut(data$x, sort(breaks$x), include.lowest = TRUE)
-    ybin <- cut(data$y, sort(breaks$y), include.lowest = TRUE)
+  if (!is.null(breaks))
+    return(breaks)
 
-    if (is.null(data$weight)) data$weight <- 1
+  range <- scale_limits(scale)
 
-    counts <- as.data.frame(
-      xtabs(weight ~ xbin + ybin, data), responseName = "count")
-    if (drop) counts <- subset(counts, count > 0)
+  if (is.null(binwidth) || identical(binwidth, NA)) {
+    binwidth <- diff(range) / bins
+  }
+  stopifnot(is.numeric(binwidth), length(binwidth) == 1)
 
-    counts$xint <- as.numeric(counts$xbin)
-    counts$xmin <- breaks$x[counts$xint]
-    counts$xmax <- breaks$x[counts$xint + 1]
+  if (is.null(origin) || identical(origin, NA)) {
+    origin <- plyr::round_any(range[1], binwidth, floor)
+  }
+  stopifnot(is.numeric(origin), length(origin) == 1)
 
-    counts$yint <- as.numeric(counts$ybin)
-    counts$ymin <- breaks$y[counts$yint]
-    counts$ymax <- breaks$y[counts$yint + 1]
+  breaks <- seq(origin, range[2] + binwidth, binwidth)
+  adjust_breaks(breaks, right)
+}
 
-    counts$density <- counts$count / sum(counts$count, na.rm = TRUE)
-    counts
+adjust_breaks <- function(x, right = TRUE) {
+  diddle <- 1e-07 * stats::median(diff(x))
+  if (right) {
+    fuzz <- c(-diddle, rep.int(diddle, length(x) - 1))
+  } else {
+    fuzz <- c(rep.int(-diddle, length(x) - 1), diddle)
   }
-)
+  sort(x) + fuzz
+}
+
+bin_loc <- function(x, id) {
+  left <- x[-length(x)]
+  right <- x[-1]
+
+  list(
+    left = left[id],
+    right = right[id],
+    mid = ((left + right) / 2)[id],
+    length = diff(x)[id]
+  )
+}
@@ -1,14 +1,13 @@
 #' @export
 #' @rdname geom_hex
-#' @param bins numeric vector specifying number of bins in both x and y
-#'   directions. Set to 30 by default.
-#' @inheritParams stat_identity
+#' @inheritParams stat_bin_2d
 #' @param na.rm If \code{FALSE} (the default), removes missing values with
 #'    a warning.  If \code{TRUE} silently removes missing values.
 #' @aliases stat_binhex
 stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex",
-                        position = "identity", bins = 30, na.rm = FALSE,
-                        show.legend = NA, inherit.aes = TRUE, ...) {
+                        position = "identity", bins = 30, binwidth = NULL,
+                        na.rm = FALSE, show.legend = NA, inherit.aes = TRUE,
+                        ...) {
   layer(
     data = data,
     mapping = mapping,
@@ -18,7 +17,8 @@ stat_bin_hex <- function(mapping = NULL, data = NULL, geom = "hex",
     show.legend = show.legend,
     inherit.aes = inherit.aes,
     stat_params = list(
-      bins = bins
+      bins = bins,
+      binwidth = binwidth
     ),
     params = list(...)
   )
 
@@ -33,10 +33,10 @@
 #' if (requireNamespace("hexbin")) {
 #' d + stat_summary_hex()
 #' }
-stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "rect",
-                           position = "identity", bins = 30, drop = TRUE,
-                           fun = "mean", fun.args = list(), show.legend = NA,
-                           inherit.aes = TRUE, ...) {
+stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "raster",
+                            position = "identity", bins = 30, binwidth = NULL,
+                            drop = TRUE, fun = "mean", fun.args = list(),
+                            show.legend = NA, inherit.aes = TRUE, ...) {
   layer(
     data = data,
     mapping = mapping,
@@ -47,6 +47,7 @@ stat_summary_2d <- function(mapping = NULL, data = NULL, geom = "rect",
     inherit.aes = inherit.aes,
     stat_params = list(
       bins = bins,
+      binwidth = binwidth,
       drop = drop,
       fun = fun,
       fun.args = fun.args
@@ -75,70 +76,47 @@ StatSummary2d <- ggproto("StatSummary2d", Stat,
   compute_group = function(data, panel_info, binwidth = NULL, bins = 30,
                            breaks = NULL, origin = NULL, drop = TRUE,
                            fun = "mean", fun.args = list(), ...) {
-    range <- list(
-      x = scale_dimension(panel_info$x, c(0, 0)),
-      y = scale_dimension(panel_info$y, c(0, 0))
-    )
+    origin <- dual_param(origin, list(NULL, NULL))
+    binwidth <- dual_param(binwidth, list(NULL, NULL))
+    breaks <- dual_param(breaks, list(NULL, NULL))
+    bins <- dual_param(bins, list(x = 30, y = 30))
 
-    # Determine origin, if omitted
-    if (is.null(origin)) {
-      origin <- c(NA, NA)
-    } else {
-      stopifnot(is.numeric(origin))
-      stopifnot(length(origin) == 2)
-    }
-    originf <- function(x) if (is.integer(x)) -0.5 else min(x)
-    if (is.na(origin[1])) origin[1] <- originf(data$x)
-    if (is.na(origin[2])) origin[2] <- originf(data$y)
+    xbreaks <- bin_breaks(panel_info$x, breaks$x, origin$x, binwidth$x, bins$x)
+    ybreaks <- bin_breaks(panel_info$y, breaks$y, origin$y, binwidth$y, bins$y)
 
-    # Determine binwidth, if omitted
-    if (is.null(binwidth)) {
-      binwidth <- c(NA, NA)
-      if (is.integer(data$x)) {
-        binwidth[1] <- 1
-      } else {
-        binwidth[1] <- diff(range$x) / bins
-      }
-      if (is.integer(data$y)) {
-        binwidth[2] <- 1
-      } else {
-        binwidth[2] <- diff(range$y) / bins
-      }
-    }
-    stopifnot(is.numeric(binwidth))
-    stopifnot(length(binwidth) == 2)
+    xbin <- cut(data$x, xbreaks, include.lowest = TRUE, label = FALSE)
+    ybin <- cut(data$y, ybreaks, include.lowest = TRUE, label = FALSE)
 
-    # Determine breaks, if omitted
-    if (is.null(breaks)) {
-      breaks <- list(
-        seq(origin[1], max(range$x) + binwidth[1], binwidth[1]),
-        seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
-      )
-    } else {
-      stopifnot(is.list(breaks))
-      stopifnot(length(breaks) == 2)
-      stopifnot(all(sapply(breaks, is.numeric)))
+    f <- function(x) {
+      do.call(fun, c(list(quote(x)), fun.args))
     }
-    names(breaks) <- c("x", "y")
+    out <- tapply_df(data$z, list(xbin = xbin, ybin = ybin), f, drop = drop)
 
-    xbin <- cut(data$x, sort(breaks$x), include.lowest = TRUE)
-    ybin <- cut(data$y, sort(breaks$y), include.lowest = TRUE)
+    xdim <- bin_loc(xbreaks, out$xbin)
+    out$x <- xdim$mid
+    out$width <- xdim$length
 
-    if (is.null(data$weight)) data$weight <- 1
+    ydim <- bin_loc(ybreaks, out$ybin)
+    out$y <- ydim$mid
+    out$height <- ydim$length
+
+    out
+  }
+)
 
-    ans <- plyr::ddply(data.frame(data, xbin, ybin), c("xbin", "ybin"), function(d) {
-      val <- do.call(fun, c(list(quote(d$z)), fun.args))
-      data.frame(value = val)
-    })
-    if (drop) ans <- stats::na.omit(ans)
+# Adaptation of tapply that returns a data frame instead of a matrix
+tapply_df <- function(x, index, fun, ..., drop = TRUE) {
+  labels <- lapply(index, ulevels)
+  out <- expand.grid(labels, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
 
-    ans$xint <- as.numeric(ans$xbin)
-    ans$xmin <- breaks$x[ans$xint]
-    ans$xmax <- breaks$x[ans$xint + 1]
+  grps <- split(x, index)
+  names(grps) <- NULL
+  out$value <- unlist(lapply(grps, fun, ...))
 
-    ans$yint <- as.numeric(ans$ybin)
-    ans$ymin <- breaks$y[ans$yint]
-    ans$ymax <- breaks$y[ans$yint + 1]
-    ans
+  if (drop) {
+    n <- vapply(grps, length, integer(1))
+    out <- out[n > 0, , drop = FALSE]
   }
-)
+
+  out
+}
@@ -1,10 +1,10 @@
 #' @export
 #' @rdname stat_summary_2d
-#' @inheritParams stat_binhex
+#' @inheritParams stat_bin_hex
 stat_summary_hex <- function(mapping = NULL, data = NULL, geom = "hex",
-                             position = "identity", bins = 30, drop = TRUE,
-                             fun = "mean", fun.args = list(), show.legend = NA,
-                             inherit.aes = TRUE, ...) {
+                             position = "identity", bins = 30, binwidth = NULL,
+                             drop = TRUE, fun = "mean", fun.args = list(),
+                             show.legend = NA, inherit.aes = TRUE, ...) {
   layer(
     data = data,
     mapping = mapping,
@@ -15,6 +15,7 @@ stat_summary_hex <- function(mapping = NULL, data = NULL, geom = "hex",
     inherit.aes = inherit.aes,
     stat_params = list(
       bins = bins,
+      binwidth = binwidth,
       drop = drop,
       fun = fun,
       fun.args = fun.args