Skip to content

Commit 418911e

Browse files
builtin function: json[b]_object_agg
Add basic support of json_object_agg. Current implementation of the aggregator only allows agg over a single column. We implement a workaround where we inject a call to `json_build_object` over the key and value columns, hard-code an alias for that call, and give that alias to the aggregator. Release-Note-Core: Added support for builtin functions `json[b]_object_agg` and `json_objectagg`. Fixes: REA-4369 Change-Id: I5c5548e0a491b1ba1adf49c0699f2308c8ab0acf Reviewed-on: https://gerrit.readyset.name/c/readyset/+/8848 Reviewed-by: Vassili Zarouba <vassili@readyset.io> Tested-by: Buildkite CI
1 parent 8bedb4a commit 418911e

File tree

20 files changed

+304
-17
lines changed

20 files changed

+304
-17
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dataflow-expression/src/eval.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ macro_rules! non_null {
1919
}
2020

2121
pub(crate) mod builtins;
22-
mod json;
22+
pub mod json;
2323

2424
fn eval_binary_op(op: BinaryOperator, left: &DfValue, right: &DfValue) -> ReadySetResult<DfValue> {
2525
use BinaryOperator::*;
@@ -718,7 +718,7 @@ mod tests {
718718
assert_eq!(test_eval(&json_arr1, &json_obj1), expected);
719719

720720
let expected = json!([99, 100]);
721-
assert_eq!(test_eval(&json!(99), &json!(100)), expected);
721+
assert_eq!(test_eval(&json!([99]), &json!([100])), expected);
722722

723723
// Test error conditions
724724

dataflow-expression/src/eval/builtins.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,11 +2551,6 @@ mod tests {
25512551

25522552
test_identity("{}");
25532553
test_identity("[]");
2554-
test_identity("1");
2555-
test_identity("2.0");
2556-
test_identity("true");
2557-
test_identity("false");
2558-
test_identity("null");
25592554
test_identity("[null]");
25602555
}
25612556

dataflow-expression/src/eval/json.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ pub(crate) fn json_object_from_pairs(
292292
Ok(result.to_json_string()?.into())
293293
}
294294

295-
pub(crate) fn json_object_from_keys_and_values(
295+
pub fn json_object_from_keys_and_values(
296296
keys: &Array,
297297
values: &Array,
298298
allow_duplicate_keys: bool,

dataflow-expression/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mod binary_operator;
2-
mod eval;
2+
pub mod eval;
33
pub mod like;
44
mod lower;
55
mod promotion;

dataflow-expression/src/reader_processing.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::sync::Arc;
55

66
use partial_map::InsertionOrder;
77
use readyset_data::DfValue;
8-
use readyset_errors::{internal, ReadySetResult};
8+
use readyset_errors::{internal, unsupported, ReadySetResult};
99
use readyset_sql::ast::OrderType;
1010
use serde::{Deserialize, Serialize};
1111

@@ -26,6 +26,8 @@ pub enum PostLookupAggregateFunction {
2626
Max,
2727
/// Take the minimum input value
2828
Min,
29+
/// Use specified Key-value pair to build a JSON Object
30+
JsonObjectAgg { allow_duplicate_keys: bool },
2931
}
3032

3133
impl PostLookupAggregateFunction {
@@ -45,6 +47,9 @@ impl PostLookupAggregateFunction {
4547
.into()),
4648
PostLookupAggregateFunction::Max => Ok(cmp::max(val1, val2).clone()),
4749
PostLookupAggregateFunction::Min => Ok(cmp::min(val1, val2).clone()),
50+
PostLookupAggregateFunction::JsonObjectAgg { .. } => {
51+
unsupported!("JsonObjectAgg is not supported as a post-lookup aggregate")
52+
}
4853
}
4954
}
5055
}

logictests/json_object_agg.test

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
statement ok
2+
CREATE TABLE test (id INT, name TEXT PRIMARY KEY);
3+
4+
statement ok
5+
INSERT INTO test (id, name) VALUES (1, 'a'), (2, 'b'), (3, 'c'), (2, 'd'), (3, 'e');
6+
7+
query T
8+
SELECT json_object_agg(id, name) FROM test;
9+
----
10+
{"1":"a","2":"b","3":"c","2":"d","3":"e"}
11+
12+
query T
13+
SELECT jsonb_object_agg(id, name) FROM test;
14+
----
15+
{"1":"a","2":"d","3":"e"}
16+
17+
statement ok
18+
DELETE FROM test WHERE name = 'a';
19+
20+
query T
21+
SELECT json_object_agg(id, name) FROM test;
22+
----
23+
{"2":"b","3":"c","2":"d","3":"e"}
24+
25+
statement ok
26+
DELETE FROM test WHERE name = 'd';
27+
28+
query T
29+
SELECT json_object_agg(id, name) FROM test;
30+
----
31+
{"2":"b","3":"c","3":"e"}
32+
33+
query T
34+
SELECT jsonb_object_agg(id, name) FROM test;
35+
----
36+
{"2":"b","3":"e"}
37+
38+
statement ok
39+
CREATE TABLE test_commas (name TEXT PRIMARY KEY, id INT);
40+
41+
statement ok
42+
INSERT INTO test_commas (name, id) VALUES ('Hello, World', 1), (',', 2);
43+
44+
query T
45+
SELECT json_object_agg(name, id) FROM test_commas;
46+
----
47+
{",":2,"Hello, World":1}
48+

nom-sql/src/common.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,33 @@ pub fn function_expr(
374374
separator,
375375
},
376376
),
377+
map(
378+
tuple((
379+
alt((
380+
map(tag_no_case("json_object_agg"), |_| true), //psql
381+
map(tag_no_case("jsonb_object_agg"), |_| false), //psql
382+
map(tag_no_case("json_objectagg"), |_| false), //mysql, jsonb_objectagg
383+
//not supported
384+
)),
385+
preceded(
386+
whitespace0,
387+
delimited(
388+
terminated(tag("("), whitespace0),
389+
tuple((
390+
expression(dialect),
391+
preceded(whitespace0, tag(",")),
392+
expression(dialect),
393+
)),
394+
preceded(whitespace0, tag(")")),
395+
),
396+
),
397+
)),
398+
|(is_json, (key, _, value))| FunctionExpr::JsonObjectAgg {
399+
key: Box::new(key),
400+
value: Box::new(value),
401+
allow_duplicate_keys: is_json,
402+
},
403+
),
377404
extract(dialect),
378405
lower(dialect),
379406
upper(dialect),

readyset-data/src/lib.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,22 @@ impl From<JsonValue> for DfValue {
17491749

17501750
impl From<&JsonValue> for DfValue {
17511751
fn from(value: &JsonValue) -> Self {
1752-
value.to_string().into()
1752+
match value {
1753+
JsonValue::String(s) => s.clone().into(),
1754+
JsonValue::Null => DfValue::None,
1755+
JsonValue::Bool(true) => DfValue::UnsignedInt(1),
1756+
JsonValue::Bool(false) => DfValue::UnsignedInt(0),
1757+
JsonValue::Number(number) if number.is_i64() => DfValue::Int(number.as_i64().unwrap()),
1758+
JsonValue::Number(number) if number.is_u64() => {
1759+
DfValue::UnsignedInt(number.as_u64().unwrap())
1760+
}
1761+
JsonValue::Number(number) if number.is_f64() => {
1762+
DfValue::Double(number.as_f64().unwrap())
1763+
}
1764+
JsonValue::Number(number) => DfValue::from(number.to_string()),
1765+
// TODO: add DfValue::JSON
1766+
JsonValue::Object(_) | JsonValue::Array(_) => DfValue::from(value.to_string()),
1767+
}
17531768
}
17541769
}
17551770

readyset-dataflow/src/ops/grouped/aggregate.rs

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::collections::hash_map::DefaultHasher;
22
use std::collections::HashMap;
33
use std::hash::{Hash, Hasher};
44

5+
use dataflow_expression::eval::json;
56
use readyset_data::dialect::SqlEngine;
67
use readyset_data::{Collation, DfType, Dialect};
78
use readyset_errors::{invariant, ReadySetResult};
@@ -22,7 +23,13 @@ pub enum Aggregation {
2223
/// Average the value of the `over` column. Maintains count and sum in HashMap
2324
Avg,
2425
/// Concatenates using the given separator between values.
25-
GroupConcat { separator: String },
26+
GroupConcat {
27+
separator: String,
28+
},
29+
30+
JsonObjectAgg {
31+
allow_duplicate_keys: bool,
32+
},
2633
}
2734

2835
impl Aggregation {
@@ -71,6 +78,7 @@ impl Aggregation {
7178
}
7279
}
7380
Aggregation::GroupConcat { .. } => DfType::Text(/* TODO */ Collation::default()),
81+
Aggregation::JsonObjectAgg { .. } => DfType::Text(Collation::default()),
7482
};
7583

7684
Ok(GroupedOperator::new(
@@ -150,6 +158,12 @@ impl AverageDataPair {
150158
/// Auxiliary State for an Aggregator node, which is owned by a Domain
151159
pub struct AggregatorState {
152160
count_sum_map: HashMap<GroupHash, AverageDataPair>,
161+
162+
// Store all `json_object_agg` keys and values in vecs and compute the json from them
163+
// on-the-fly. This allows for easier handling of distinct (jsonb) behaviour,
164+
// especially with deletions.
165+
json_agg_keys: Vec<DfValue>,
166+
json_agg_vals: Vec<DfValue>,
153167
}
154168

155169
impl Aggregator {
@@ -222,10 +236,12 @@ impl GroupedOperation for Aggregator {
222236
}
223237
};
224238

225-
let count_sum_map = match auxiliary_node_state {
226-
Some(AuxiliaryNodeState::Aggregation(ref mut aggregator_state)) => {
227-
&mut aggregator_state.count_sum_map
228-
}
239+
let (count_sum_map, json_agg_keys, json_agg_vals) = match auxiliary_node_state {
240+
Some(AuxiliaryNodeState::Aggregation(ref mut aggregator_state)) => (
241+
&mut aggregator_state.count_sum_map,
242+
&mut aggregator_state.json_agg_keys,
243+
&mut aggregator_state.json_agg_vals,
244+
),
229245
Some(_) => internal!("Incorrect auxiliary state for Aggregation node"),
230246
None => internal!("Missing auxiliary state for Aggregation node"),
231247
};
@@ -240,6 +256,42 @@ impl GroupedOperation for Aggregator {
240256
.apply_diff(diff)
241257
};
242258

259+
let mut apply_json_object_agg =
260+
|_curr, diff: Self::Diff, allow_dups| -> ReadySetResult<DfValue> {
261+
let (key, value) = diff
262+
.value
263+
.to_json()?
264+
.as_object()
265+
.ok_or_else(|| {
266+
internal_err!("json_object_agg: json_object value is not an object")
267+
})?
268+
.iter()
269+
.next()
270+
.ok_or_else(|| internal_err!("json_object_agg: json_object is empty"))
271+
.map(|(k, v)| (DfValue::from(k.as_str()), DfValue::from(v)))?;
272+
273+
if diff.positive {
274+
json_agg_keys.push(key);
275+
json_agg_vals.push(value.clone());
276+
} else if let Some(pos) = json_agg_keys
277+
.iter()
278+
.zip(json_agg_vals.iter_mut())
279+
.position(|(k, v)| k == &key && v == &value)
280+
{
281+
json_agg_keys.remove(pos);
282+
json_agg_vals.remove(pos);
283+
} else {
284+
internal!("json_object_agg: diff removed a non-existant key-value pair")
285+
}
286+
287+
// TODO: Indent the output
288+
json::json_object_from_keys_and_values(
289+
&json_agg_keys.clone().into(),
290+
&json_agg_vals.clone().into(),
291+
allow_dups,
292+
)
293+
};
294+
243295
let apply_diff =
244296
|curr: ReadySetResult<DfValue>, diff: Self::Diff| -> ReadySetResult<DfValue> {
245297
if diff.value.is_none() {
@@ -253,6 +305,9 @@ impl GroupedOperation for Aggregator {
253305
Aggregation::GroupConcat { separator: _ } => internal!(
254306
"GroupConcats are separate from the other aggregations in the dataflow."
255307
),
308+
Aggregation::JsonObjectAgg {
309+
allow_duplicate_keys,
310+
} => apply_json_object_agg(curr?, diff, allow_duplicate_keys),
256311
}
257312
};
258313

@@ -270,6 +325,15 @@ impl GroupedOperation for Aggregator {
270325
Aggregation::GroupConcat { separator: ref s } => {
271326
format!("||({})", s)
272327
}
328+
Aggregation::JsonObjectAgg {
329+
allow_duplicate_keys,
330+
} => {
331+
if allow_duplicate_keys {
332+
"JsonObjectAgg".to_owned()
333+
} else {
334+
"JsonbObjectAgg".to_owned()
335+
}
336+
}
273337
};
274338
}
275339

@@ -278,6 +342,15 @@ impl GroupedOperation for Aggregator {
278342
Aggregation::Sum => format!("𝛴({})", self.over),
279343
Aggregation::Avg => format!("Avg({})", self.over),
280344
Aggregation::GroupConcat { separator: ref s } => format!("||({}, {})", s, self.over),
345+
Aggregation::JsonObjectAgg {
346+
allow_duplicate_keys,
347+
} => {
348+
if allow_duplicate_keys {
349+
format!("JsonObjectAgg({})", self.over)
350+
} else {
351+
format!("JsonbObjectAgg({})", self.over)
352+
}
353+
}
281354
};
282355
let group_cols = self
283356
.group

0 commit comments

Comments
 (0)