Skip to content

Feature/stat ellipse #926

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Mar 16, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ Collate:
'stat-density-2d.r'
'stat-density.r'
'stat-ecdf.r'
'stat-ellipse.R'
'stat-function.r'
'stat-identity.r'
'stat-qq.r'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ export(stat_contour)
export(stat_density)
export(stat_density2d)
export(stat_ecdf)
export(stat_ellipse)
export(stat_function)
export(stat_hline)
export(stat_identity)
Expand Down Expand Up @@ -413,5 +414,6 @@ import(plyr)
import(proto)
import(reshape2)
import(scales)
importFrom(MASS,cov.trob)
importFrom(MASS,kde2d)
importFrom(methods,setRefClass)
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
ggplot2 0.9.3.1.99
----------------------------------------------------------------
* `stat_ellipse()` adds data ellipses. It supports bivariate normal and t distributions,
as well as a euclidian distance circle. (@jofrhwld, #926)

* Allow specifying only one of the limits in a scale and use the automatic
calculation of the other limit by passing NA to to the limit function,
Expand Down
102 changes: 102 additions & 0 deletions R/stat-ellipse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#' Plot data ellipses.
#'
#' @param level The confidence level at which to draw an ellipse (default is 0.95),
#' or, if \code{type="euclid"}, the radius of the circle to be drawn.
#' @param type The type of ellipse.
#' The default \code{"t"} assumes a multivariate t-distribution, and
#' \code{"norm"} assumes a multivariate normal distribution.
#' \code{"euclid"} draws a circle with the radius equal to \code{level},
#' representing the euclidian distance from the center.
#' This ellipse probably won't appear circular unless \code{coord_fixed()} is applied.
#' @param segments The number of segments to be used in drawing the ellipse.
#' @param na.rm If \code{FALSE} (the default), removes missing values with
#' a warning. If \code{TRUE} silently removes missing values.
#' @inheritParams stat_identity
#'
#' @details The method for calculating the ellipses has been modified from car::ellipse (Fox and Weisberg, 2011)
#'
#' @references
#' John Fox and Sanford Weisberg (2011). An {R} Companion to Applied Regression, Second Edition. Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion
#'
#' @export
#' @importFrom MASS cov.trob
#'
#' @examples
#' ggplot(faithful, aes(waiting, eruptions))+
#' geom_point()+
#' stat_ellipse()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse(type = "norm", linetype = 2)+
#' stat_ellipse(type = "t")
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse(type = "norm", linetype = 2)+
#' stat_ellipse(type = "euclid", level = 3)+
#' coord_fixed()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' stat_ellipse(geom = "polygon")

stat_ellipse <- function(mapping = NULL, data = NULL, geom = "path", position = "identity", type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...) {
StatEllipse$new(mapping = mapping, data = data, geom = geom, position = position, type = type, level = level, segments = segments, na.rm = na.rm, ...)
}

StatEllipse <- proto(Stat, {
objname <- "ellipse"

required_aes <- c("x", "y")
default_geom <- function(.) GeomPath

calculate_groups <- function(., data, scales, ...){
.super$calculate_groups(., data, scales,...)
}
calculate <- function(., data, scales, type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...){
data <- remove_missing(data, na.rm, vars = c("x","y"), name = "stat_ellipse", finite = TRUE)
ellipse <- calculate_ellipse(data=data, vars= c("x","y"), type=type, level=level, segments=segments)
return(ellipse)
}
})

calculate_ellipse <- function(data, vars, type, level, segments){
dfn <- 2
dfd <- nrow(data) - 1

if (!type %in% c("t", "norm", "euclid")){
message("Unrecognized ellipse type")
ellipse <- rbind(as.numeric(c(NA, NA)))
} else if (dfd < 3){
message("Too few points to calculate an ellipse")
ellipse <- rbind(as.numeric(c(NA, NA)))
} else {
if (type == "t"){
v <- cov.trob(data[,vars])
} else if (type == "norm"){
v <- cov.wt(data[,vars])
} else if (type == "euclid"){
v <- cov.wt(data[,vars])
v$cov <- diag(rep(min(diag(v$cov)), 2))
}
shape <- v$cov
center <- v$center
chol_decomp <- chol(shape)
if (type == "euclid"){
radius <- level/max(chol_decomp)
} else {
radius <- sqrt(dfn * qf(level, dfn, dfd))
}
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
ellipse <- t(center + radius * t(unit.circle %*% chol_decomp))
}

ellipse <- as.data.frame(ellipse)
colnames(ellipse) <- vars
return(ellipse)
}
80 changes: 80 additions & 0 deletions man/stat_ellipse.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
% Generated by roxygen2 (4.0.0): do not edit by hand
\name{stat_ellipse}
\alias{stat_ellipse}
\title{Plot data ellipses.}
\usage{
stat_ellipse(mapping = NULL, data = NULL, geom = "path",
position = "identity", type = "t", level = 0.95, segments = 51,
na.rm = FALSE, ...)
}
\arguments{
\item{level}{The confidence level at which to draw an
ellipse (default is 0.95), or, if \code{type="euclid"},
the radius of the circle to be drawn.}

\item{type}{The type of ellipse. The default \code{"t"}
assumes a multivariate t-distribution, and \code{"norm"}
assumes a multivariate normal distribution.
\code{"euclid"} draws a circle with the radius equal to
\code{level}, representing the euclidian distance from
the center. This ellipse probably won't appear circular
unless \code{coord_fixed()} is applied.}

\item{segments}{The number of segments to be used in
drawing the ellipse.}

\item{na.rm}{If \code{FALSE} (the default), removes
missing values with a warning. If \code{TRUE} silently
removes missing values.}

\item{mapping}{The aesthetic mapping, usually constructed
with \code{\link{aes}} or \code{\link{aes_string}}. Only
needs to be set at the layer level if you are overriding
the plot defaults.}

\item{data}{A layer specific dataset - only needed if you
want to override the plot defaults.}

\item{geom}{The geometric object to use display the data}

\item{position}{The position adjustment to use for
overlappling points on this layer}

\item{...}{other arguments passed on to
\code{\link{layer}}. This can include aesthetics whose
values you want to set, not map. See \code{\link{layer}}
for more details.}
}
\description{
Plot data ellipses.
}
\details{
The method for calculating the ellipses has been modified from car::ellipse (Fox and Weisberg, 2011)
}
\examples{
ggplot(faithful, aes(waiting, eruptions))+
geom_point()+
stat_ellipse()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse(type = "norm", linetype = 2)+
stat_ellipse(type = "t")

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse(type = "norm", linetype = 2)+
stat_ellipse(type = "euclid", level = 3)+
coord_fixed()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
stat_ellipse(geom = "polygon")
}
\references{
John Fox and Sanford Weisberg (2011). An {R} Companion to Applied Regression, Second Edition. Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion
}