Skip to content

Feature/stat_ellipse #878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,4 @@ Collate:
'translate-qplot-lattice.r'
'annotation-logticks.r'
'utilities-help.r'
'stat-ellipse.R'
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ export(stat_contour)
export(stat_density)
export(stat_density2d)
export(stat_ecdf)
export(stat_ellipse)
export(stat_function)
export(stat_hline)
export(stat_identity)
Expand Down Expand Up @@ -411,4 +412,5 @@ import(plyr)
import(proto)
import(reshape2)
import(scales)
importFrom(MASS,cov.trob)
importFrom(MASS,kde2d)
103 changes: 103 additions & 0 deletions R/stat-ellipse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' Calculate Data Ellipses
#'
#' @param level The confidence level at which to draw an ellipse (default is 0.95),
#' or, if \code{type="euclid"}, the radius of the circle to be drawn.
#' @param type The type of ellipse.
#' The default \code{"t"} assumes a multivariate t-distribution, and
#' \code{"norm"} assumes a multivariate normal distribution.
#' \code{"euclid"} draws a circle with the radius equal to \code{level}, representing the euclidian distance from the center.
#' This ellipse probably won't appear circular unless \code{coord_fixed()} is applied.
#' @param segments The number of segments to be used in drawing the ellipse.
#' @param na.rm If \code{FALSE} (the default), removes missing values with
#' a warning. If \code{TRUE} silently removes missing values.
#' @inheritParams stat_identity
#'
#' @details The code for calculating the ellipse is largely borrowed from car::ellipse.
#'
#' @export
#' @importFrom MASS cov.trob
#'
#' @examples
#' \donttest{
#' ggplot(faithful, aes(waiting, eruptions))+
#' geom_point()+
#' stat_ellipse()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse(type = "norm", linetype = 2)+
#' stat_ellipse(type = "t")
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' geom_point()+
#' stat_ellipse(type = "norm", linetype = 2)+
#' stat_ellipse(type = "euclid", level = 3)+
#' coord_fixed()
#'
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
#' stat_ellipse(geom = "polygon")
#' }

stat_ellipse <- function(mapping=NULL, data=NULL, geom="path", position="identity",
type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...) {
StatEllipse$new(mapping=mapping, data=data, geom=geom, position=position,
type = type, level=level, segments=segments, na.rm = na.rm, ...)
}


StatEllipse <- proto(Stat,
{
objname <- "ellipse"

required_aes <- c("x", "y")
default_geom <- function(.) GeomPath

calculate_groups <- function(., data, scales, ...){
.super$calculate_groups(., data, scales,...)
}
calculate <- function(., data, scales, type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...){
data <- remove_missing(data, na.rm, vars = c("x","y"), name = "stat_ellipse",
finite = TRUE)

dfn <- 2
dfd <- length(data$x) - 1

if(!type %in% c("t","norm","euclid")){
message("Unrecognized ellipse type")
ellipse <- rbind(as.numeric(c(NA,NA)))
} else if (dfd < 3){
message("Too few points to calculate an ellipse")
ellipse <- rbind(as.numeric(c(NA,NA)))
} else {
if(type == "t"){
v <- cov.trob(cbind(data$x, data$y))
}else if(type == "norm"){
v <- cov.wt(cbind(data$x, data$y))
} else if(type == "euclid"){
v <- cov.wt(cbind(data$x, data$y))
v$cov <- diag(rep(min(diag(v$cov)), 2))
}
shape <- v$cov
center <- v$center
chol_decomp <- chol(shape)
if(type == "euclid"){
radius <- level/max(chol_decomp)
}else{
radius <- sqrt(dfn * qf(level, dfn, dfd))
}
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
ellipse <- t(center + radius * t(unit.circle %*% chol_decomp))
}

ellipse <- as.data.frame(ellipse)
colnames(ellipse) <- c("x","y")
return(ellipse)
}
}
)

79 changes: 79 additions & 0 deletions man/stat_ellipse.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
\name{stat_ellipse}
\alias{stat_ellipse}
\title{Calculate Data Ellipses}
\usage{
stat_ellipse(mapping = NULL, data = NULL, geom = "path",
position = "identity", type = "t", level = 0.95,
segments = 51, na.rm = FALSE, ...)
}
\arguments{
\item{level}{The confidence level at which to draw an
ellipse (default is 0.95), or, if \code{type="euclid"},
the radius of the circle to be drawn.}

\item{type}{The type of ellipse. The default \code{"t"}
assumes a multivariate t-distribution, and \code{"norm"}
assumes a multivariate normal distribution.
\code{"euclid"} draws a circle with the radius equal to
\code{level}, representing the euclidian distance from
the center. This ellipse probably won't appear circular
unless \code{coord_fixed()} is applied.}

\item{segments}{The number of segments to be used in
drawing the ellipse.}

\item{na.rm}{If \code{FALSE} (the default), removes
missing values with a warning. If \code{TRUE} silently
removes missing values.}

\item{mapping}{The aesthetic mapping, usually constructed
with \code{\link{aes}} or \code{\link{aes_string}}. Only
needs to be set at the layer level if you are overriding
the plot defaults.}

\item{data}{A layer specific dataset - only needed if you
want to override the plot defaults.}

\item{geom}{The geometric object to use display the data}

\item{position}{The position adjustment to use for
overlappling points on this layer}

\item{...}{other arguments passed on to
\code{\link{layer}}. This can include aesthetics whose
values you want to set, not map. See \code{\link{layer}}
for more details.}
}
\description{
Calculate Data Ellipses
}
\details{
The code for calculating the ellipse is largely borrowed
from car::ellipse.
}
\examples{
\donttest{
ggplot(faithful, aes(waiting, eruptions))+
geom_point()+
stat_ellipse()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse(type = "norm", linetype = 2)+
stat_ellipse(type = "t")

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
geom_point()+
stat_ellipse(type = "norm", linetype = 2)+
stat_ellipse(type = "euclid", level = 3)+
coord_fixed()

ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
stat_ellipse(geom = "polygon")
}
}