-
Notifications
You must be signed in to change notification settings - Fork 184
Redesign the group, bin, and stack transforms. #176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
c0017da
fc4f5ec
53f5b1d
976ee09
3502c09
5c0d2ed
8e21466
743c8b7
73e3a66
d8a1a50
a6bf930
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,93 +1,20 @@ | ||
import {arrayify, identity, first, second, maybeLabel, maybeZero} from "../mark.js"; | ||
import {group1, group2} from "../transforms/group.js"; | ||
import {groupX, groupY, group} from "../transforms/group.js"; | ||
import {barX, barY} from "./bar.js"; | ||
import {cell} from "./cell.js"; | ||
import {dot} from "./dot.js"; | ||
|
||
export function group(data, { | ||
x = first, | ||
y = second, | ||
transform, | ||
...options | ||
} = {}) { | ||
if (transform !== undefined) data = transform(arrayify(data)); | ||
return cell( | ||
data, | ||
{ | ||
...options, | ||
transform: group2(x, y), | ||
x: maybeLabel(first, x), | ||
y: maybeLabel(second, y), | ||
fill: length3 | ||
} | ||
); | ||
export function groupCell(data, options) { | ||
return cell(...group(data, {out: "fill", ...options})); | ||
} | ||
|
||
export function groupX(data, { | ||
x = identity, | ||
y, | ||
y1, | ||
y2, | ||
transform, | ||
...options | ||
} = {}) { | ||
data = arrayify(data); | ||
if (transform !== undefined) data = arrayify(transform(data)); | ||
([y1, y2] = maybeZero(y, y1, y2, maybeLength(data, options))); | ||
return barY( | ||
data, | ||
{ | ||
...options, | ||
transform: group1(x), | ||
x: maybeLabel(first, x), | ||
y1, | ||
y2 | ||
} | ||
); | ||
export function groupDot(data, options) { | ||
return dot(...group(data, {out: "r", ...options})); | ||
} | ||
|
||
export function groupY(data, { | ||
y = identity, | ||
x, | ||
x1, | ||
x2, | ||
transform, | ||
...options | ||
} = {}) { | ||
data = arrayify(data); | ||
if (transform !== undefined) data = arrayify(transform(data)); | ||
([x1, x2] = maybeZero(x, x1, x2, maybeLength(data, options))); | ||
return barX( | ||
data, | ||
{ | ||
...options, | ||
transform: group1(y), | ||
x1, | ||
x2, | ||
y: maybeLabel(first, y) | ||
} | ||
); | ||
export function groupBarX(data, options) { | ||
return barX(...groupY(data, options)); | ||
} | ||
|
||
function length2([, group]) { | ||
return group.length; | ||
} | ||
|
||
function length3([,, group]) { | ||
return group.length; | ||
} | ||
|
||
length2.label = length3.label = "Frequency"; | ||
|
||
function maybeLength(data, {normalize}) { | ||
return normalize ? normalizer(normalize, data.length) : length2; | ||
} | ||
|
||
// An alternative channel definition to length2 (above) that computes the | ||
// proportion of each bin in [0, k]. If k is true, it is treated as 100 for | ||
// percentages; otherwise, it is typically 1. | ||
function normalizer(k, n) { | ||
k = k === true ? 100 : +k; | ||
const value = ([, group]) => group.length * k / n; | ||
value.label = `Frequency${k === 100 ? " (%)" : ""}`; | ||
return value; | ||
export function groupBarY(data, options) { | ||
return barY(...groupX(data, options)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,48 @@ | ||
import {groups} from "d3-array"; | ||
import {defined} from "../defined.js"; | ||
import {valueof, maybeValue, range, offsetRange} from "../mark.js"; | ||
import {valueof, maybeValue, range, offsetRange, maybeLabel, first, arrayify, second, identity} from "../mark.js"; | ||
|
||
export function group1(x) { | ||
export function groupX(data, {x = identity, transform, ...options} = {}) { | ||
if (transform !== undefined) data = transform(arrayify(data)); | ||
return [ | ||
data, | ||
{ | ||
...options, | ||
transform: group1(x), | ||
x: maybeLabel(first, x), | ||
y: maybeLength(data, options) | ||
} | ||
]; | ||
} | ||
|
||
export function groupY(data, {y = identity, transform, ...options} = {}) { | ||
if (transform !== undefined) data = transform(arrayify(data)); | ||
return [ | ||
data, | ||
{ | ||
...options, | ||
transform: group1(y), | ||
y: maybeLabel(first, y), | ||
x: maybeLength(data, options) | ||
} | ||
]; | ||
} | ||
|
||
export function group(data, {x = first, y = second, out, transform, ...options} = {}) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. out = "frequency" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably better to throw explicitly if (out === undefined) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There’s no mark channel named “frequency”, so I don’t think we want that as a default — it’s not usable directly. Agree we should throw an error if it’s missing though. |
||
if (transform !== undefined) data = transform(arrayify(data)); | ||
return [ | ||
data, | ||
{ | ||
...options, | ||
transform: group2(x, y), | ||
x: maybeLabel(first, x), | ||
y: maybeLabel(second, y), | ||
[out]: length3 | ||
} | ||
]; | ||
} | ||
|
||
function group1(x) { | ||
const {value} = maybeValue({value: x}); | ||
return (data, facets) => { | ||
const values = valueof(data, value); | ||
|
@@ -11,7 +51,7 @@ export function group1(x) { | |
}; | ||
} | ||
|
||
export function group2(vx, vy) { | ||
function group2(vx, vy) { | ||
const {value: x} = maybeValue({value: vx}); | ||
const {value: y} = maybeValue({value: vy}); | ||
return (data, facets) => { | ||
|
@@ -51,6 +91,30 @@ function defined1([key]) { | |
} | ||
|
||
// When faceting, some groups may be empty; these are filtered out. | ||
export function nonempty1([, {length}]) { | ||
function nonempty1([, {length}]) { | ||
return length > 0; | ||
} | ||
|
||
function length2([, group]) { | ||
return group.length; | ||
} | ||
|
||
function length3([,, group]) { | ||
return group.length; | ||
} | ||
|
||
length2.label = length3.label = "Frequency"; | ||
|
||
function maybeLength(data, {normalize}) { | ||
return normalize ? normalizer(normalize, data.length) : length2; | ||
} | ||
|
||
// An alternative channel definition to length2 (above) that computes the | ||
// proportion of each bin in [0, k]. If k is true, it is treated as 100 for | ||
// percentages; otherwise, it is typically 1. | ||
function normalizer(k, n) { | ||
k = k === true ? 100 : +k; | ||
const value = ([, group]) => group.length * k / n; | ||
value.label = `Frequency${k === 100 ? " (%)" : ""}`; | ||
return value; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
out:"fill"
is a bit awkward, the goal is to indicate the name of the property we want to set in the result.I have struggled with the same issue in Plot.stack. My solution was to give a fixed name (e.g. high, low), then convert this later with
{y2: high, y1: low, y: mid, value: value}
(but I couldn't find a nice way to do it from within the call).https://github.com/observablehq/plot/pull/132/files#diff-90dbe63978af6da907d965dde0b7bcb94aa16eccae54868c5b075122a6e2ec5aR9
If I wanted to follow your approach but for multiple return values, would I have to write something like
...stack(data, {value: y, out_high: "y2", out_low: "y1", out_value: "value", out_mid: "y"…}
, or maybe...stack(data, {value: y, out: {high: "y2", low: "y1", value: "value", mid: "y"…}
?It would also be good to have a default "out" that would keep the names as-is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I considered having separate groupR and groupFill helpers which specify out = r and out = fill respectively, but since there are already helpers for groupCell and groupDot, it didn’t seem necessary. I expect that the out argument will be used rarely by users.
And it is syntactically shorter than having to use destructuring for the same thing, so if the expectation is that these things will always be renamed, it seems nicer to design for it. If it outputs to a z channel and requires destructuring to use, how is that better?
For stack I would expect that you would always use stackX or stackY. When you would you use something else? And so I don’t think I would expose the stack function. Even if you did want something other than stackX or stackY, you could always rename x/x1/x2 or y/y1/y2 the same way you are currently renaming high/low/etc. The latter are never usable directly as mark channels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stepping back for a second, I think the desire for these transforms (stack, group, bin) is to produce data + channel definitions that can be passed to marks. Mark channels have well-known names (e.g., x1, x2), and there only a small number of them, so I’d prefer the transforms to return data structures that use these names rather than introducing new transform-specific names.
In the case of stack, the anticipated “outs” are either x/y1/y2 or y/x1/x2, so if the goal is to share code internally, I could imagine an out = y for the first case and an out = x for the second case. But again it would just be an internal method so it doesn’t really matter what we call it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’m not sure we want to support stacked lines, but I might prefer stackLineX1, stackLineX2, stackLineY1, stackLineY2 to the position argument for similar reasons. (The position argument introduces new words bottom/center/top whereas x1/x2/y1/y2 are existing words, and also are less confusing in the stackX case.) I’m not sure when we’d want the midline, so I’d remove that for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Decoupling the mark and the stack operation just helped me to think it thru. But you're right we can go back to stackY by default.
I use the midline in the policeDeaths plot, to create a guide in the middle (mostly for aesthetic reasons), and also to position the labels. Also, transformed into a textPath, to position labels in the US population chart.