diff --git a/DESCRIPTION b/DESCRIPTION index 1d21d4be8b..d3b384219c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -205,3 +205,4 @@ Collate: 'translate-qplot-lattice.r' 'annotation-logticks.r' 'utilities-help.r' + 'stat-ellipse.R' diff --git a/NAMESPACE b/NAMESPACE index 8598ef01b9..de743bec10 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -367,6 +367,7 @@ export(stat_contour) export(stat_density) export(stat_density2d) export(stat_ecdf) +export(stat_ellipse) export(stat_function) export(stat_hline) export(stat_identity) @@ -411,4 +412,5 @@ import(plyr) import(proto) import(reshape2) import(scales) +importFrom(MASS,cov.trob) importFrom(MASS,kde2d) diff --git a/R/stat-ellipse.R b/R/stat-ellipse.R new file mode 100644 index 0000000000..8ad29c6650 --- /dev/null +++ b/R/stat-ellipse.R @@ -0,0 +1,103 @@ +#' Calculate Data Ellipses +#' +#' @param level The confidence level at which to draw an ellipse (default is 0.95), +#' or, if \code{type="euclid"}, the radius of the circle to be drawn. +#' @param type The type of ellipse. +#' The default \code{"t"} assumes a multivariate t-distribution, and +#' \code{"norm"} assumes a multivariate normal distribution. +#' \code{"euclid"} draws a circle with the radius equal to \code{level}, representing the euclidian distance from the center. +#' This ellipse probably won't appear circular unless \code{coord_fixed()} is applied. +#' @param segments The number of segments to be used in drawing the ellipse. +#' @param na.rm If \code{FALSE} (the default), removes missing values with +#' a warning. If \code{TRUE} silently removes missing values. +#' @inheritParams stat_identity +#' +#' @details The code for calculating the ellipse is largely borrowed from car::ellipse. +#' +#' @export +#' @importFrom MASS cov.trob +#' +#' @examples +#' \donttest{ +#' ggplot(faithful, aes(waiting, eruptions))+ +#' geom_point()+ +#' stat_ellipse() +#' +#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ +#' geom_point()+ +#' stat_ellipse() +#' +#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ +#' geom_point()+ +#' stat_ellipse(type = "norm", linetype = 2)+ +#' stat_ellipse(type = "t") +#' +#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ +#' geom_point()+ +#' stat_ellipse(type = "norm", linetype = 2)+ +#' stat_ellipse(type = "euclid", level = 3)+ +#' coord_fixed() +#' +#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ +#' stat_ellipse(geom = "polygon") +#' } + +stat_ellipse <- function(mapping=NULL, data=NULL, geom="path", position="identity", + type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...) { + StatEllipse$new(mapping=mapping, data=data, geom=geom, position=position, + type = type, level=level, segments=segments, na.rm = na.rm, ...) +} + + +StatEllipse <- proto(Stat, + { + objname <- "ellipse" + + required_aes <- c("x", "y") + default_geom <- function(.) GeomPath + + calculate_groups <- function(., data, scales, ...){ + .super$calculate_groups(., data, scales,...) + } + calculate <- function(., data, scales, type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...){ + data <- remove_missing(data, na.rm, vars = c("x","y"), name = "stat_ellipse", + finite = TRUE) + + dfn <- 2 + dfd <- length(data$x) - 1 + + if(!type %in% c("t","norm","euclid")){ + message("Unrecognized ellipse type") + ellipse <- rbind(as.numeric(c(NA,NA))) + } else if (dfd < 3){ + message("Too few points to calculate an ellipse") + ellipse <- rbind(as.numeric(c(NA,NA))) + } else { + if(type == "t"){ + v <- cov.trob(cbind(data$x, data$y)) + }else if(type == "norm"){ + v <- cov.wt(cbind(data$x, data$y)) + } else if(type == "euclid"){ + v <- cov.wt(cbind(data$x, data$y)) + v$cov <- diag(rep(min(diag(v$cov)), 2)) + } + shape <- v$cov + center <- v$center + chol_decomp <- chol(shape) + if(type == "euclid"){ + radius <- level/max(chol_decomp) + }else{ + radius <- sqrt(dfn * qf(level, dfn, dfd)) + } + angles <- (0:segments) * 2 * pi/segments + unit.circle <- cbind(cos(angles), sin(angles)) + ellipse <- t(center + radius * t(unit.circle %*% chol_decomp)) + } + + ellipse <- as.data.frame(ellipse) + colnames(ellipse) <- c("x","y") + return(ellipse) + } + } +) + diff --git a/man/stat_ellipse.Rd b/man/stat_ellipse.Rd new file mode 100644 index 0000000000..214939f44d --- /dev/null +++ b/man/stat_ellipse.Rd @@ -0,0 +1,79 @@ +\name{stat_ellipse} +\alias{stat_ellipse} +\title{Calculate Data Ellipses} +\usage{ + stat_ellipse(mapping = NULL, data = NULL, geom = "path", + position = "identity", type = "t", level = 0.95, + segments = 51, na.rm = FALSE, ...) +} +\arguments{ + \item{level}{The confidence level at which to draw an + ellipse (default is 0.95), or, if \code{type="euclid"}, + the radius of the circle to be drawn.} + + \item{type}{The type of ellipse. The default \code{"t"} + assumes a multivariate t-distribution, and \code{"norm"} + assumes a multivariate normal distribution. + \code{"euclid"} draws a circle with the radius equal to + \code{level}, representing the euclidian distance from + the center. This ellipse probably won't appear circular + unless \code{coord_fixed()} is applied.} + + \item{segments}{The number of segments to be used in + drawing the ellipse.} + + \item{na.rm}{If \code{FALSE} (the default), removes + missing values with a warning. If \code{TRUE} silently + removes missing values.} + + \item{mapping}{The aesthetic mapping, usually constructed + with \code{\link{aes}} or \code{\link{aes_string}}. Only + needs to be set at the layer level if you are overriding + the plot defaults.} + + \item{data}{A layer specific dataset - only needed if you + want to override the plot defaults.} + + \item{geom}{The geometric object to use display the data} + + \item{position}{The position adjustment to use for + overlappling points on this layer} + + \item{...}{other arguments passed on to + \code{\link{layer}}. This can include aesthetics whose + values you want to set, not map. See \code{\link{layer}} + for more details.} +} +\description{ + Calculate Data Ellipses +} +\details{ + The code for calculating the ellipse is largely borrowed + from car::ellipse. +} +\examples{ +\donttest{ +ggplot(faithful, aes(waiting, eruptions))+ + geom_point()+ + stat_ellipse() + +ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ + geom_point()+ + stat_ellipse() + +ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ + geom_point()+ + stat_ellipse(type = "norm", linetype = 2)+ + stat_ellipse(type = "t") + +ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ + geom_point()+ + stat_ellipse(type = "norm", linetype = 2)+ + stat_ellipse(type = "euclid", level = 3)+ + coord_fixed() + +ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+ + stat_ellipse(geom = "polygon") +} +} +