1
1
# ' @param bins numeric vector giving number of bins in both vertical and
2
2
# ' horizontal directions. Set to 30 by default.
3
+ # ' @param binwidth Numeric vector giving bin width in both vertical and
4
+ # ' horizontal directions. Overrides \code{bins} if both set.
3
5
# ' @param drop if \code{TRUE} removes all cells with 0 counts.
4
6
# ' @export
5
7
# ' @aliases stat_bin2d
6
8
# ' @rdname geom_bin2d
7
- stat_bin_2d <- function (mapping = NULL , data = NULL , geom = " rect " ,
8
- position = " identity" , bins = 30 , drop = TRUE ,
9
- show.legend = NA , inherit.aes = TRUE , ... ) {
9
+ stat_bin_2d <- function (mapping = NULL , data = NULL , geom = " raster " ,
10
+ position = " identity" , bins = 30 , binwidth = NULL ,
11
+ drop = TRUE , show.legend = NA , inherit.aes = TRUE , ... ) {
10
12
layer(
11
13
data = data ,
12
14
mapping = mapping ,
@@ -17,6 +19,7 @@ stat_bin_2d <- function(mapping = NULL, data = NULL, geom = "rect",
17
19
inherit.aes = inherit.aes ,
18
20
stat_params = list (
19
21
bins = bins ,
22
+ binwidth = binwidth ,
20
23
drop = drop
21
24
),
22
25
params = list (... )
@@ -36,79 +39,98 @@ StatBin2d <- ggproto("StatBin2d", Stat,
36
39
37
40
compute_group = function (data , panel_info , binwidth = NULL , bins = 30 ,
38
41
breaks = NULL , origin = NULL , drop = TRUE , ... ) {
39
- range <- list (
40
- x = scale_dimension(panel_info $ x , c(0 , 0 )),
41
- y = scale_dimension(panel_info $ y , c(0 , 0 ))
42
- )
43
-
44
- # is.integer(...) below actually deals with factor input data, which is
45
- # integer by now. Bins for factor data should take the width of one level,
46
- # and should show up centered over their tick marks.
47
-
48
- # Determine origin, if omitted
49
- if (is.null(origin )) {
50
- origin <- c(NA , NA )
51
- } else {
52
- stopifnot(is.numeric(origin ))
53
- stopifnot(length(origin ) == 2 )
54
- }
55
- originf <- function (x ) if (is.integer(x )) - 0.5 else min(x , na.rm = TRUE )
56
- if (is.na(origin [1 ])) origin [1 ] <- originf(data $ x )
57
- if (is.na(origin [2 ])) origin [2 ] <- originf(data $ y )
58
-
59
- # Determine binwidth, if omitted
60
- if (is.null(binwidth )) {
61
- binwidth <- c(NA , NA )
62
- if (is.integer(data $ x )) {
63
- binwidth [1 ] <- 1
64
- } else {
65
- binwidth [1 ] <- diff(range $ x ) / bins
66
- }
67
- if (is.integer(data $ y )) {
68
- binwidth [2 ] <- 1
69
- } else {
70
- binwidth [2 ] <- diff(range $ y ) / bins
71
- }
72
- }
73
- stopifnot(is.numeric(binwidth ))
74
- stopifnot(length(binwidth ) == 2 )
75
42
76
- # Determine breaks, if omitted
77
- if (is.null( breaks )) {
78
- breaks <- list ( x = NULL , y = NULL )
79
- }
43
+ origin <- dual_param( origin , list ( NULL , NULL ))
44
+ binwidth <- dual_param( binwidth , list ( NULL , NULL ))
45
+ breaks <- dual_param( breaks , list ( NULL , NULL ) )
46
+ bins <- dual_param( bins , list ( x = 30 , y = 30 ))
80
47
81
- stopifnot(length( breaks ) == 2 )
82
- names( breaks ) <- c( " x " , " y " )
48
+ xbreaks <- bin_breaks( panel_info $ x , breaks $ x , origin $ x , binwidth $ x , bins $ x )
49
+ ybreaks <- bin_breaks( panel_info $ y , breaks $ y , origin $ y , binwidth $ y , bins $ y )
83
50
84
- if (is.null(breaks $ x )) {
85
- breaks $ x <- seq(origin [1 ], max(range $ x ) + binwidth [1 ], binwidth [1 ])
86
- }
87
- if (is.null(breaks $ y )) {
88
- breaks $ y <- seq(origin [2 ], max(range $ y ) + binwidth [2 ], binwidth [2 ])
51
+ xbin <- cut(data $ x , xbreaks , include.lowest = TRUE , label = FALSE )
52
+ ybin <- cut(data $ y , ybreaks , include.lowest = TRUE , label = FALSE )
53
+
54
+ if (is.null(data $ weight ))
55
+ data $ weight <- 1
56
+
57
+ out <- tapply_df(data $ weight , list (xbin = xbin , ybin = ybin ), sum , drop = drop )
58
+
59
+ xdim <- bin_loc(xbreaks , out $ xbin )
60
+ out $ x <- xdim $ mid
61
+ out $ width <- xdim $ length
62
+
63
+ ydim <- bin_loc(ybreaks , out $ ybin )
64
+ out $ y <- ydim $ mid
65
+ out $ height <- ydim $ length
66
+
67
+ out $ count <- out $ value
68
+ out $ density <- out $ count / sum(out $ count , na.rm = TRUE )
69
+ out
70
+ }
71
+ )
72
+
73
+ dual_param <- function (x , default = list (x = NULL , y = NULL )) {
74
+ if (is.null(x )) {
75
+ default
76
+ } else if (length(x ) == 2 ) {
77
+ if (is.list(x ) && ! is.null(names(x ))) {
78
+ x
79
+ } else {
80
+ list (x = x [[1 ]], y = x [[2 ]])
89
81
}
82
+ } else {
83
+ list (x = x , y = x )
84
+ }
85
+ }
90
86
91
- stopifnot(is.list(breaks ))
92
- stopifnot(all(sapply(breaks , is.numeric )))
87
+ bin_breaks <- function (scale , breaks = NULL , origin = NULL , binwidth = NULL ,
88
+ bins = 30 , right = 30 ) {
89
+ # Bins for categorical data should take the width of one level,
90
+ # and should show up centered over their tick marks. All other parameters
91
+ # are ignored.
92
+ if (inherits(scale , " discrete" )) {
93
+ breaks <- scale_breaks(scale )
94
+ return (- 0.5 + seq_len(length(breaks ) + 1 ))
95
+ }
93
96
94
- xbin <- cut( data $ x , sort (breaks $ x ), include.lowest = TRUE )
95
- ybin <- cut( data $ y , sort( breaks $ y ), include.lowest = TRUE )
97
+ if ( ! is.null (breaks ) )
98
+ return ( breaks )
96
99
97
- if (is.null( data $ weight )) data $ weight <- 1
100
+ range <- scale_limits( scale )
98
101
99
- counts <- as.data.frame(
100
- xtabs(weight ~ xbin + ybin , data ), responseName = " count" )
101
- if (drop ) counts <- subset(counts , count > 0 )
102
+ if (is.null(binwidth ) || identical(binwidth , NA )) {
103
+ binwidth <- diff(range ) / bins
104
+ }
105
+ stopifnot(is.numeric(binwidth ), length(binwidth ) == 1 )
102
106
103
- counts $ xint <- as.numeric(counts $ xbin )
104
- counts $ xmin <- breaks $ x [counts $ xint ]
105
- counts $ xmax <- breaks $ x [counts $ xint + 1 ]
107
+ if (is.null(origin ) || identical(origin , NA )) {
108
+ origin <- plyr :: round_any(range [1 ], binwidth , floor )
109
+ }
110
+ stopifnot(is.numeric(origin ), length(origin ) == 1 )
106
111
107
- counts $ yint <- as.numeric( counts $ ybin )
108
- counts $ ymin <- breaks $ y [ counts $ yint ]
109
- counts $ ymax <- breaks $ y [ counts $ yint + 1 ]
112
+ breaks <- seq( origin , range [ 2 ] + binwidth , binwidth )
113
+ adjust_breaks( breaks , right )
114
+ }
110
115
111
- counts $ density <- counts $ count / sum(counts $ count , na.rm = TRUE )
112
- counts
116
+ adjust_breaks <- function (x , right = TRUE ) {
117
+ diddle <- 1e-07 * stats :: median(diff(x ))
118
+ if (right ) {
119
+ fuzz <- c(- diddle , rep.int(diddle , length(x ) - 1 ))
120
+ } else {
121
+ fuzz <- c(rep.int(- diddle , length(x ) - 1 ), diddle )
113
122
}
114
- )
123
+ sort(x ) + fuzz
124
+ }
125
+
126
+ bin_loc <- function (x , id ) {
127
+ left <- x [- length(x )]
128
+ right <- x [- 1 ]
129
+
130
+ list (
131
+ left = left [id ],
132
+ right = right [id ],
133
+ mid = ((left + right ) / 2 )[id ],
134
+ length = diff(x )[id ]
135
+ )
136
+ }
0 commit comments