Skip to content

Commit cd2b828

Browse files
dataflow-expression: Add support for the length's functions
This commit adds support for the length, char_length, character_length and octet_length functions. These functions behavior depend on their dialect. PostgreSQL counts amount of bytes when using octet_length and characters otherwise. For MySQL, length and octet_length is the number of bytes in the string, and char_length or character_length is the number of characters in the string. Also, PostgreSQL is strict to on the input type, so it will return an error if the input is not a string. MySQL, on the other hand, will convert the input to a string and return the length of the string. Release-Note-Core: Add support for `length`, `char_length`, `character_length` and `octet_length` functions. Change-Id: I888f1e8bfde1c61cd4a00a66fd40727a8f344bc1 Reviewed-on: https://gerrit.readyset.name/c/readyset/+/8004 Tested-by: Buildkite CI Reviewed-by: Jason Brown <jason.b@readyset.io>
1 parent 964a07b commit cd2b828

File tree

3 files changed

+127
-0
lines changed

3 files changed

+127
-0
lines changed

dataflow-expression/src/eval/builtins.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use chrono_tz::Tz;
1212
use itertools::Either;
1313
use mysql_time::MySqlTime;
1414
use nom_sql::TimestampField;
15+
use readyset_data::dialect::SqlEngine;
1516
use readyset_data::{DfType, DfValue, TimestampTz};
1617
use readyset_errors::{internal, invalid_query_err, unsupported, ReadySetError, ReadySetResult};
1718
use readyset_util::math::integer_rnd;
@@ -1267,6 +1268,52 @@ impl BuiltinFunction {
12671268
}
12681269
.and_then(|value| value.coerce_to(ty, &DfType::Unknown))
12691270
}
1271+
BuiltinFunction::Length {
1272+
expr,
1273+
in_bytes,
1274+
dialect,
1275+
} => {
1276+
let s = non_null!(expr.eval(record)?);
1277+
match dialect.engine() {
1278+
SqlEngine::PostgreSQL => {
1279+
if !matches!(s, DfValue::Text(_) | DfValue::TinyText(_)) {
1280+
return Err(invalid_query_err!("LENGTH() requires a string argument"));
1281+
};
1282+
let s = s.to_string();
1283+
let len = if *in_bytes {
1284+
s.len() as i64
1285+
} else {
1286+
s.chars().count() as i64
1287+
};
1288+
Ok(DfValue::Int(len))
1289+
}
1290+
SqlEngine::MySQL => match s {
1291+
DfValue::None
1292+
| DfValue::Max
1293+
| DfValue::PassThrough(_)
1294+
| DfValue::BitVector(_)
1295+
| DfValue::Array(_)
1296+
| DfValue::ByteArray(_) => Ok(DfValue::None),
1297+
DfValue::Double(_)
1298+
| DfValue::Float(_)
1299+
| DfValue::Int(_)
1300+
| DfValue::Numeric(_)
1301+
| DfValue::Text(_)
1302+
| DfValue::Time(_)
1303+
| DfValue::TimestampTz(_)
1304+
| DfValue::TinyText(_)
1305+
| DfValue::UnsignedInt(_) => {
1306+
let s = s.to_string();
1307+
let len = if *in_bytes {
1308+
s.len() as i64
1309+
} else {
1310+
s.chars().count() as i64
1311+
};
1312+
Ok(DfValue::Int(len))
1313+
}
1314+
},
1315+
}
1316+
}
12701317
}
12711318
}
12721319
}
@@ -3335,4 +3382,50 @@ mod tests {
33353382
test_with_null_string("{null,1,2,3,null,5}", "*,1,2,3,*,5");
33363383
test_with_null_string("{{1,2},{3,4},{null,5}}", "1,2,3,4,*,5");
33373384
}
3385+
3386+
#[test]
3387+
fn length() {
3388+
// MySQL
3389+
let expr = "octet_length('ザ')";
3390+
assert_eq!(eval_expr(expr, MySQL), 3.into());
3391+
3392+
let expr = "length('hello')";
3393+
assert_eq!(eval_expr(expr, MySQL), 5.into());
3394+
3395+
let expr = "length('')";
3396+
assert_eq!(eval_expr(expr, MySQL), 0.into());
3397+
3398+
let expr = "length(null)";
3399+
assert_eq!(eval_expr(expr, MySQL), DfValue::None);
3400+
3401+
let expr = "length('ザ')";
3402+
assert_eq!(eval_expr(expr, MySQL), 3.into());
3403+
3404+
let expr = "char_length('ザ')";
3405+
assert_eq!(eval_expr(expr, MySQL), 1.into());
3406+
3407+
let expr = "character_length('ザ')";
3408+
assert_eq!(eval_expr(expr, MySQL), 1.into());
3409+
3410+
let expr = "length(1)";
3411+
assert_eq!(eval_expr(expr, MySQL), 1.into());
3412+
3413+
let expr = "char_length(1)";
3414+
assert_eq!(eval_expr(expr, MySQL), 1.into());
3415+
3416+
// PostgreSQL
3417+
let expr = "length('ザ')";
3418+
assert_eq!(eval_expr(expr, PostgreSQL), 1.into());
3419+
3420+
let expr = "char_length('ザ')";
3421+
assert_eq!(eval_expr(expr, PostgreSQL), 1.into());
3422+
3423+
let expr = "octet_length('ザ')";
3424+
assert_eq!(eval_expr(expr, PostgreSQL), 3.into());
3425+
3426+
let expr = "length(1)";
3427+
if let Ok(value) = try_eval_expr(expr, PostgreSQL) {
3428+
panic!("Expected error for `{expr}`, got {value:?}");
3429+
}
3430+
}
33383431
}

dataflow-expression/src/lib.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,16 @@ pub enum BuiltinFunction {
115115

116116
/// [`date_trunc`](https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC)
117117
Extract(TimestampField, Expr),
118+
119+
/// `length` | `char_length` | `character_length`:
120+
/// * [MySQL](https://dev.mysql.com/doc/refman/8.4/en/string-functions.html#function_length)
121+
/// * [Postgres](https://www.postgresql.org/docs/current/functions-string.html#FUNCTIONS-LENGTH)
122+
///
123+
Length {
124+
expr: Expr,
125+
in_bytes: bool, // if true, return the length in bytes, otherwise in characters
126+
dialect: Dialect,
127+
},
118128
}
119129

120130
impl BuiltinFunction {
@@ -150,6 +160,7 @@ impl BuiltinFunction {
150160
ArrayToString { .. } => "array_to_string",
151161
DateTrunc { .. } => "date_trunc",
152162
Extract { .. } => "extract",
163+
Length { .. } => "length",
153164
}
154165
}
155166
}
@@ -251,6 +262,9 @@ impl Display for BuiltinFunction {
251262
Extract(field, expr) => {
252263
write!(f, "({} FROM {})", field, expr)
253264
}
265+
Length { expr, .. } => {
266+
write!(f, "({})", expr)
267+
}
254268
}
255269
}
256270
}

dataflow-expression/src/lower.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,26 @@ impl BuiltinFunction {
448448

449449
(Self::DateTrunc(precision, source), ret_type)
450450
}
451+
"length" | "octet_length" | "char_length" | "character_length" => {
452+
// MySQL - `LENGTH()`, `OCTET_LENGTH()` = in bytes | `CHAR_LENGTH()`, `CHARACTER_LENGTH()` = in characters
453+
// PostgreSQL - `OCTET_LENGTH()` = in bytes | `LENGTH()`, `CHAR_LENGTH()`, `CHARACTER_LENGTH()` = in characters
454+
let expr = next_arg()?;
455+
let ty = if expr.ty().is_any_text() {
456+
DfType::BigInt
457+
} else {
458+
DfType::Int
459+
};
460+
let in_bytes = (matches!(dialect.engine(), SqlEngine::MySQL) && name == "length")
461+
|| name == "octet_length";
462+
(
463+
Self::Length {
464+
expr,
465+
in_bytes,
466+
dialect,
467+
},
468+
ty,
469+
)
470+
}
451471
_ => unsupported!("Function {name} does not exist"),
452472
};
453473

0 commit comments

Comments
 (0)