Skip to content

Commit 276780a

Browse files
committed
Merge pull request #926 from JoFrhwld/feature/stat-ellipse
Feature/stat ellipse
2 parents 6343f79 + f199a24 commit 276780a

File tree

5 files changed

+187
-0
lines changed

5 files changed

+187
-0
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ Collate:
175175
'stat-density-2d.r'
176176
'stat-density.r'
177177
'stat-ecdf.r'
178+
'stat-ellipse.R'
178179
'stat-function.r'
179180
'stat-identity.r'
180181
'stat-qq.r'

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ export(stat_contour)
369369
export(stat_density)
370370
export(stat_density2d)
371371
export(stat_ecdf)
372+
export(stat_ellipse)
372373
export(stat_function)
373374
export(stat_hline)
374375
export(stat_identity)
@@ -413,5 +414,6 @@ import(plyr)
413414
import(proto)
414415
import(reshape2)
415416
import(scales)
417+
importFrom(MASS,cov.trob)
416418
importFrom(MASS,kde2d)
417419
importFrom(methods,setRefClass)

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
ggplot2 0.9.3.1.99
22
----------------------------------------------------------------
3+
* `stat_ellipse()` adds data ellipses. It supports bivariate normal and t distributions,
4+
as well as a euclidian distance circle. (@jofrhwld, #926)
35

46
* Allow specifying only one of the limits in a scale and use the automatic
57
calculation of the other limit by passing NA to to the limit function,

R/stat-ellipse.R

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#' Plot data ellipses.
2+
#'
3+
#' @param level The confidence level at which to draw an ellipse (default is 0.95),
4+
#' or, if \code{type="euclid"}, the radius of the circle to be drawn.
5+
#' @param type The type of ellipse.
6+
#' The default \code{"t"} assumes a multivariate t-distribution, and
7+
#' \code{"norm"} assumes a multivariate normal distribution.
8+
#' \code{"euclid"} draws a circle with the radius equal to \code{level},
9+
#' representing the euclidian distance from the center.
10+
#' This ellipse probably won't appear circular unless \code{coord_fixed()} is applied.
11+
#' @param segments The number of segments to be used in drawing the ellipse.
12+
#' @param na.rm If \code{FALSE} (the default), removes missing values with
13+
#' a warning. If \code{TRUE} silently removes missing values.
14+
#' @inheritParams stat_identity
15+
#'
16+
#' @details The method for calculating the ellipses has been modified from car::ellipse (Fox and Weisberg, 2011)
17+
#'
18+
#' @references
19+
#' John Fox and Sanford Weisberg (2011). An {R} Companion to Applied Regression, Second Edition. Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion
20+
#'
21+
#' @export
22+
#' @importFrom MASS cov.trob
23+
#'
24+
#' @examples
25+
#' ggplot(faithful, aes(waiting, eruptions))+
26+
#' geom_point()+
27+
#' stat_ellipse()
28+
#'
29+
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
30+
#' geom_point()+
31+
#' stat_ellipse()
32+
#'
33+
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
34+
#' geom_point()+
35+
#' stat_ellipse(type = "norm", linetype = 2)+
36+
#' stat_ellipse(type = "t")
37+
#'
38+
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
39+
#' geom_point()+
40+
#' stat_ellipse(type = "norm", linetype = 2)+
41+
#' stat_ellipse(type = "euclid", level = 3)+
42+
#' coord_fixed()
43+
#'
44+
#' ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
45+
#' stat_ellipse(geom = "polygon")
46+
47+
stat_ellipse <- function(mapping = NULL, data = NULL, geom = "path", position = "identity", type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...) {
48+
StatEllipse$new(mapping = mapping, data = data, geom = geom, position = position, type = type, level = level, segments = segments, na.rm = na.rm, ...)
49+
}
50+
51+
StatEllipse <- proto(Stat, {
52+
objname <- "ellipse"
53+
54+
required_aes <- c("x", "y")
55+
default_geom <- function(.) GeomPath
56+
57+
calculate_groups <- function(., data, scales, ...){
58+
.super$calculate_groups(., data, scales,...)
59+
}
60+
calculate <- function(., data, scales, type = "t", level = 0.95, segments = 51, na.rm = FALSE, ...){
61+
data <- remove_missing(data, na.rm, vars = c("x","y"), name = "stat_ellipse", finite = TRUE)
62+
ellipse <- calculate_ellipse(data=data, vars= c("x","y"), type=type, level=level, segments=segments)
63+
return(ellipse)
64+
}
65+
})
66+
67+
calculate_ellipse <- function(data, vars, type, level, segments){
68+
dfn <- 2
69+
dfd <- nrow(data) - 1
70+
71+
if (!type %in% c("t", "norm", "euclid")){
72+
message("Unrecognized ellipse type")
73+
ellipse <- rbind(as.numeric(c(NA, NA)))
74+
} else if (dfd < 3){
75+
message("Too few points to calculate an ellipse")
76+
ellipse <- rbind(as.numeric(c(NA, NA)))
77+
} else {
78+
if (type == "t"){
79+
v <- cov.trob(data[,vars])
80+
} else if (type == "norm"){
81+
v <- cov.wt(data[,vars])
82+
} else if (type == "euclid"){
83+
v <- cov.wt(data[,vars])
84+
v$cov <- diag(rep(min(diag(v$cov)), 2))
85+
}
86+
shape <- v$cov
87+
center <- v$center
88+
chol_decomp <- chol(shape)
89+
if (type == "euclid"){
90+
radius <- level/max(chol_decomp)
91+
} else {
92+
radius <- sqrt(dfn * qf(level, dfn, dfd))
93+
}
94+
angles <- (0:segments) * 2 * pi/segments
95+
unit.circle <- cbind(cos(angles), sin(angles))
96+
ellipse <- t(center + radius * t(unit.circle %*% chol_decomp))
97+
}
98+
99+
ellipse <- as.data.frame(ellipse)
100+
colnames(ellipse) <- vars
101+
return(ellipse)
102+
}

man/stat_ellipse.Rd

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
% Generated by roxygen2 (4.0.0): do not edit by hand
2+
\name{stat_ellipse}
3+
\alias{stat_ellipse}
4+
\title{Plot data ellipses.}
5+
\usage{
6+
stat_ellipse(mapping = NULL, data = NULL, geom = "path",
7+
position = "identity", type = "t", level = 0.95, segments = 51,
8+
na.rm = FALSE, ...)
9+
}
10+
\arguments{
11+
\item{level}{The confidence level at which to draw an
12+
ellipse (default is 0.95), or, if \code{type="euclid"},
13+
the radius of the circle to be drawn.}
14+
15+
\item{type}{The type of ellipse. The default \code{"t"}
16+
assumes a multivariate t-distribution, and \code{"norm"}
17+
assumes a multivariate normal distribution.
18+
\code{"euclid"} draws a circle with the radius equal to
19+
\code{level}, representing the euclidian distance from
20+
the center. This ellipse probably won't appear circular
21+
unless \code{coord_fixed()} is applied.}
22+
23+
\item{segments}{The number of segments to be used in
24+
drawing the ellipse.}
25+
26+
\item{na.rm}{If \code{FALSE} (the default), removes
27+
missing values with a warning. If \code{TRUE} silently
28+
removes missing values.}
29+
30+
\item{mapping}{The aesthetic mapping, usually constructed
31+
with \code{\link{aes}} or \code{\link{aes_string}}. Only
32+
needs to be set at the layer level if you are overriding
33+
the plot defaults.}
34+
35+
\item{data}{A layer specific dataset - only needed if you
36+
want to override the plot defaults.}
37+
38+
\item{geom}{The geometric object to use display the data}
39+
40+
\item{position}{The position adjustment to use for
41+
overlappling points on this layer}
42+
43+
\item{...}{other arguments passed on to
44+
\code{\link{layer}}. This can include aesthetics whose
45+
values you want to set, not map. See \code{\link{layer}}
46+
for more details.}
47+
}
48+
\description{
49+
Plot data ellipses.
50+
}
51+
\details{
52+
The method for calculating the ellipses has been modified from car::ellipse (Fox and Weisberg, 2011)
53+
}
54+
\examples{
55+
ggplot(faithful, aes(waiting, eruptions))+
56+
geom_point()+
57+
stat_ellipse()
58+
59+
ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
60+
geom_point()+
61+
stat_ellipse()
62+
63+
ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
64+
geom_point()+
65+
stat_ellipse(type = "norm", linetype = 2)+
66+
stat_ellipse(type = "t")
67+
68+
ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
69+
geom_point()+
70+
stat_ellipse(type = "norm", linetype = 2)+
71+
stat_ellipse(type = "euclid", level = 3)+
72+
coord_fixed()
73+
74+
ggplot(faithful, aes(waiting, eruptions, color = eruptions > 3))+
75+
stat_ellipse(geom = "polygon")
76+
}
77+
\references{
78+
John Fox and Sanford Weisberg (2011). An {R} Companion to Applied Regression, Second Edition. Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion
79+
}
80+

0 commit comments

Comments
 (0)