Skip to content

Commit 5014c8c

Browse files
committed
treewide: Replace rust_decimal with bigdecimal
We wrap the type with our own enum capable of representing +/-Infinity and NaN. This allows us to snapshot and replicate tables with any `DECIMAL`/`NUMERIC` precision, removing the limit of 27 (previously 28) digits. `bigdecimal` is certainly slower for very large numbers, and it may be worthwhile to add a `Decimal::SmallNumber` variant which can fit into `rust_decimal`'s i128 representation, but I don't know how much faster it is for smaller numbers. Fixes: REA-1933, REA-5720 Release-Note-Core: Add support for arbitrarily large `DECIMAL` and `NUMERIC` values. We can now replicate and use the full range allowed by MySQL and Postgres for these columns. This also fixes an issue with replicating NaN values, which will now work as expected. BuildDistroReleasePkg: decimalalpha Change-Id: I9ed0219e1f78d5e1aa8466a41ff6d42239fb9467 Reviewed-on: https://gerrit.readyset.name/c/readyset/+/9519 Reviewed-by: Johnathan Davis <jcd@readyset.io> Tested-by: Buildkite CI
1 parent b5388be commit 5014c8c

File tree

56 files changed

+1266
-892
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1266
-892
lines changed

Cargo.lock

Lines changed: 42 additions & 172 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ members = [
4040
"readyset-common",
4141
"readyset-data",
4242
"readyset-dataflow",
43+
"readyset-decimal",
4344
"readyset-e2e-tests",
4445
"readyset-errors",
4546
"readyset-logictest",
@@ -99,6 +100,7 @@ async-stream = "0.3.5"
99100
async-trait = "0.1"
100101
backtrace = "0.3.65"
101102
base64 = { version = "0.22.1", default-features = false, features = ["std"] }
103+
bigdecimal = "0.4.8"
102104
bimap = "0.6.3"
103105
bincode = "1.3.3"
104106
bit-vec = "0.6"
@@ -174,6 +176,8 @@ nom_locate = "4.2.0"
174176
notify = { version = "8.0.0", default-features = false, features = [
175177
"macos_kqueue",
176178
] }
179+
num-integer = "0.1.46"
180+
num-traits = "0.2.19"
177181
num_cpus = "1.16.0"
178182
once_cell = "1.20.2"
179183
opentelemetry = "0.21.0"
@@ -204,7 +208,6 @@ rocksdb = { version = "0.22.0", default-features = false, features = [
204208
"lz4",
205209
"jemalloc",
206210
] }
207-
rust_decimal = "1.26"
208211
rusty-fork = "0.3.0"
209212
rustyline = "14.0"
210213
rustyline-derive = "0.10.0"

data-generator/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ eui48 = { workspace = true }
1313
growable-bloom-filter = { workspace = true }
1414
rand = { workspace = true }
1515
rand_regex = { workspace = true }
16-
rust_decimal = { workspace = true }
1716
uuid = { workspace = true }
1817
rand_distr = { workspace = true }
1918

2019
readyset-sql = { path = "../readyset-sql" }
21-
readyset-data = { path = "../readyset-data/" }
20+
readyset-data = { path = "../readyset-data" }
21+
readyset-decimal = { path = "../readyset-decimal" }
2222

2323
[dev-dependencies]
2424
test-strategy = { workspace = true }

data-generator/src/lib.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ use rand::seq::SliceRandom;
1313
use rand::{thread_rng, Rng, RngCore};
1414
use rand_distr::Zipf;
1515
use readyset_data::{encoding::Encoding, DfType, DfValue, Dialect};
16+
use readyset_decimal::Decimal;
1617
use readyset_sql::ast::SqlType;
17-
use rust_decimal::Decimal;
1818

1919
mod distribution_annotation;
2020

@@ -756,11 +756,13 @@ pub fn unique_value_of_type(typ: &SqlType, idx: u32) -> DfValue {
756756
SqlType::MediumIntUnsigned(_) => (idx).into(),
757757
SqlType::Float | SqlType::Double => (1.5 + idx as f64).try_into().unwrap(),
758758
SqlType::Real => (1.5 + idx as f32).try_into().unwrap(),
759-
SqlType::Decimal(prec, scale) => Decimal::new(clamp_digits(*prec as _), *scale as _).into(),
759+
SqlType::Decimal(prec, scale) => {
760+
Decimal::new(clamp_digits(*prec as _) as _, *scale as _).into()
761+
}
760762
SqlType::Numeric(prec_scale) => match prec_scale {
761-
Some((prec, None)) => Decimal::new(clamp_digits(*prec as _), 1),
762-
Some((prec, Some(scale))) => Decimal::new(clamp_digits(*prec as _), *scale as _),
763-
None => Decimal::new((15 + idx) as i64, 2),
763+
Some((prec, None)) => Decimal::new(clamp_digits(*prec as _) as _, 1),
764+
Some((prec, Some(scale))) => Decimal::new(clamp_digits(*prec as _) as _, *scale as _),
765+
None => Decimal::new((15 + idx) as _, 2),
764766
}
765767
.into(),
766768
SqlType::DateTime(_) | SqlType::Timestamp => (NaiveDate::from_ymd_opt(2020, 1, 1)

dataflow-expression/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ chrono = { workspace = true, features = ["serde"] }
1111
chrono-tz = { workspace = true, features = ["serde"] }
1212
itertools = { workspace = true }
1313
lazy_static = { workspace = true }
14+
num-traits = { workspace = true }
1415
proptest = { workspace = true }
1516
regex = { workspace = true }
16-
rust_decimal = { workspace = true }
1717
serde = { workspace = true, features = ["derive"] }
1818
serde_json = { workspace = true }
1919
test-strategy = { workspace = true }
@@ -25,6 +25,7 @@ readyset-sql = { path = "../readyset-sql" }
2525
readyset-util = { path = "../readyset-util" }
2626
mysql-time = { path = "../mysql-time" }
2727
readyset-data = { path = "../readyset-data" }
28+
readyset-decimal = { path = "../readyset-decimal" }
2829
readyset-errors = { path = "../readyset-errors" }
2930
partial-map = { path = "../partial-map" }
3031

dataflow-expression/src/eval/builtins.rs

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::borrow::Borrow;
22
use std::cmp::Ordering;
33
use std::fmt::{self, Write};
4-
use std::ops::{Add, Div, Mul, Sub};
4+
use std::ops::{Add, Sub};
55
use std::str::FromStr;
66

77
use chrono::{
@@ -11,13 +11,13 @@ use chrono::{
1111
use chrono_tz::Tz;
1212
use itertools::Either;
1313
use mysql_time::MySqlTime;
14+
use num_traits::cast::FromPrimitive;
1415
use readyset_data::dialect::SqlEngine;
1516
use readyset_data::{Array, Collation, DfType, DfValue, TimestampTz};
17+
use readyset_decimal::{Decimal, RoundingMode};
1618
use readyset_errors::{internal, invalid_query_err, unsupported, ReadySetError, ReadySetResult};
1719
use readyset_sql::ast::TimestampField;
1820
use readyset_util::math::integer_rnd;
19-
use rust_decimal::prelude::{FromPrimitive, ToPrimitive};
20-
use rust_decimal::Decimal;
2121
use serde_json::Value as JsonValue;
2222
use test_strategy::Arbitrary;
2323
use vec1::Vec1;
@@ -891,14 +891,12 @@ impl BuiltinFunction {
891891
DfValue::UnsignedInt(inner) => inner as i32,
892892
DfValue::Float(f) => f.round() as i32,
893893
DfValue::Double(f) => f.round() as i32,
894-
DfValue::Numeric(d) => {
895-
// TODO(fran): I don't know if this is the right thing to do.
896-
d.round().to_i32().ok_or_else(|| {
897-
ReadySetError::BadRequest(format!(
898-
"NUMERIC value {d} exceeds 32-bit integer size"
899-
))
900-
})?
901-
}
894+
// XXX(mvzink): Yes, MySQL does round the second argument (up) if it's a decimal
895+
// before using that to round the first argument. However, Postgres throws an
896+
// error, so we are not matching that.
897+
DfValue::Numeric(d) => d
898+
.round_dp_with_strategy(0, RoundingMode::HalfUp)
899+
.try_into()?,
902900
_ => 0,
903901
};
904902

@@ -938,22 +936,10 @@ impl BuiltinFunction {
938936
Ok(DfValue::Int(rounded as _))
939937
}
940938
DfValue::Numeric(d) => {
941-
let rounded_dec = if rnd_prec >= 0 {
942-
d.round_dp_with_strategy(
943-
rnd_prec as _,
944-
rust_decimal::RoundingStrategy::MidpointAwayFromZero,
945-
)
946-
} else {
947-
let factor = Decimal::from_f64(10.0f64.powf(-rnd_prec as _)).unwrap();
948-
949-
d.div(factor)
950-
.round_dp_with_strategy(
951-
0,
952-
rust_decimal::RoundingStrategy::MidpointAwayFromZero,
953-
)
954-
.mul(factor)
955-
};
956-
939+
let rounded_dec = d.round_dp_with_strategy(
940+
rnd_prec as _,
941+
readyset_decimal::RoundingMode::HalfUp,
942+
);
957943
Ok(DfValue::Numeric(rounded_dec.into()))
958944
}
959945
dt => {
@@ -1966,29 +1952,37 @@ mod tests {
19661952
let expr = parse_and_lower("round(c0, c1)", MySQL);
19671953
assert_eq!(
19681954
expr.eval::<DfValue>(&[
1969-
DfValue::from(Decimal::from_f64(52.123).unwrap()),
1955+
DfValue::from(Decimal::try_from(52.123).unwrap()),
19701956
DfValue::from(1)
19711957
])
19721958
.unwrap(),
1973-
DfValue::from(Decimal::from_f64(52.1)),
1959+
DfValue::from(Decimal::from_str("52.1").unwrap()),
19741960
);
19751961

19761962
assert_eq!(
19771963
expr.eval::<DfValue>(&[
1978-
DfValue::from(Decimal::from_f64(-52.666).unwrap()),
1964+
DfValue::from(Decimal::try_from(-52.666).unwrap()),
19791965
DfValue::from(2)
19801966
])
19811967
.unwrap(),
1982-
DfValue::from(Decimal::from_f64(-52.67)),
1968+
DfValue::from(Decimal::from_str("-52.67").unwrap()),
19831969
);
19841970

19851971
assert_eq!(
19861972
expr.eval::<DfValue>(&[
1987-
DfValue::from(Decimal::from_f64(-52.666).unwrap()),
1973+
DfValue::from(Decimal::try_from(-52.666).unwrap()),
19881974
DfValue::from(-1)
19891975
])
19901976
.unwrap(),
1991-
DfValue::from(Decimal::from_f64(-50.)),
1977+
DfValue::from(Decimal::try_from(-50.).unwrap()),
1978+
);
1979+
assert_eq!(
1980+
expr.eval::<DfValue>(&[
1981+
DfValue::from(Decimal::try_from(-5266.666).unwrap()),
1982+
DfValue::from(-2)
1983+
])
1984+
.unwrap(),
1985+
DfValue::from(Decimal::try_from(-5300.).unwrap()),
19921986
);
19931987
}
19941988

dataflow-state/Cargo.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,13 @@ readyset-util = { path = "../readyset-util" }
4242
replication-offset = { path = "../replication-offset" }
4343

4444
[dev-dependencies]
45-
proptest-stateful = { path = "../proptest-stateful" }
4645
async-trait = { workspace = true }
47-
pretty_assertions = { workspace = true }
48-
lazy_static = { workspace = true }
4946
criterion = { workspace = true, features = ["async_tokio"] }
50-
rust_decimal = { workspace = true, features = [
51-
"db-tokio-postgres",
52-
"serde-str",
53-
] }
47+
lazy_static = { workspace = true }
48+
pretty_assertions = { workspace = true }
5449
test-utils = { path = "../test-utils" }
50+
proptest-stateful = { path = "../proptest-stateful" }
51+
readyset-decimal = { path = "../readyset-decimal" }
5552

5653
[[bench]]
5754
name = "persistent_state"

dataflow-state/src/persistent_state/mod.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,11 +2498,16 @@ mod tests {
24982498
use std::path::PathBuf;
24992499

25002500
use pretty_assertions::assert_eq;
2501+
25012502
use readyset_data::Bound::*;
2503+
25022504
use readyset_data::Collation;
2505+
25032506
use replication_offset::mysql::MySqlPosition;
2507+
25042508
use replication_offset::postgres::PostgresPosition;
2505-
use rust_decimal::Decimal;
2509+
2510+
use readyset_decimal::Decimal;
25062511

25072512
use super::*;
25082513

@@ -2709,7 +2714,7 @@ mod tests {
27092714
let mut state = setup_persistent("lookup_numeric_with_different_precision", None);
27102715
state.add_index(Index::btree_map(vec![0]), None);
27112716

2712-
let records = vec![vec![DfValue::from(Decimal::from_str_exact("4.0").unwrap())]];
2717+
let records = vec![vec![DfValue::from(Decimal::from_str("4.0").unwrap())]];
27132718

27142719
state
27152720
.process_records(&mut records.clone().into(), None, None)
@@ -2718,13 +2723,13 @@ mod tests {
27182723
let res = state
27192724
.lookup(
27202725
&[0],
2721-
&PointKey::Single(DfValue::from(Decimal::from_str_exact("4").unwrap())),
2726+
&PointKey::Single(DfValue::from(Decimal::from_str("4").unwrap())),
27222727
)
27232728
.unwrap();
27242729

27252730
assert_eq!(res, records.into());
27262731
let val = Decimal::try_from(&res.into_iter().next().unwrap()[0]).unwrap();
2727-
assert_eq!(val.scale(), 1);
2732+
assert_eq!(val.scale(), Some(1));
27282733
}
27292734

27302735
#[test]

deny.toml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,24 +132,26 @@ deny = [
132132
#{ name = "ansi_term", version = "=0.11.0", wrappers = [] },
133133
]
134134
# Certain crates/versions that will be skipped when doing duplicate detection.
135-
skip = []
135+
skip = [
136+
{ name = "bit-vec", version = "0.6.3" },
137+
{ name = "uuid", version = "0.8.2" },
138+
]
136139
# Similarly to `skip` allows you to skip certain crates during duplicate
137140
# detection. Unlike skip, it also includes the entire tree of transitive
138141
# dependencies starting at the specified crate, up to a certain depth, which is
139142
# by default infinite
140143
skip-tree = [
141-
{ name = "rusty-fork", version = "0.3.0" },
142-
{ name = "fail", version = "0.5.0" },
143-
{ name = "rust_decimal", version = "1.17.0" },
144144
{ name = "consulrs", version = "0.1.0" },
145+
{ name = "fail", version = "0.5.0" },
145146
{ name = "hashbrown", version = "0.12.3" },
146-
{ name = "unicode-width", version = "0.1.14" },
147-
{ name = "web-time", version = "0.2.4" },
147+
{ name = "hermit-abi", version = "0.3.9" },
148148
{ name = "itertools", version = "0.10.5" },
149149
{ name = "itertools", version = "0.12.1" },
150-
{ name = "winnow", version = "0.6.26" },
151150
{ name = "regex-automata", version = "0.1.10" },
152-
{ name = "hermit-abi", version = "0.3.9" },
151+
{ name = "rusty-fork", version = "0.3.0" },
152+
{ name = "unicode-width", version = "0.1.14" },
153+
{ name = "web-time", version = "0.2.4" },
154+
{ name = "winnow", version = "0.6.26" },
153155
]
154156

155157
# This section is considered when running `cargo deny check sources`.
@@ -166,9 +168,7 @@ unknown-git = "warn"
166168
# if not specified. If it is specified but empty, no registries are allowed.
167169
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
168170
# List of URLs for allowed Git repositories
169-
allow-git = [
170-
"git+https://github.com/apache/datafusion-sqlparser-rs",
171-
]
171+
allow-git = ["git+https://github.com/apache/datafusion-sqlparser-rs"]
172172

173173
[sources.allow-org]
174174
# 1 or more github.com organizations to allow git sources for

psql-srv/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ postgres-types = { workspace = true, features = [
2626
"with-cidr-0_2",
2727
] }
2828
rand = { workspace = true }
29-
rust_decimal = { workspace = true }
3029
serde_json = { workspace = true, features = ["arbitrary_precision"] }
3130
sha2 = { workspace = true }
3231
smallvec = { workspace = true }
@@ -48,6 +47,7 @@ uuid = { workspace = true }
4847
database-utils = { path = "../database-utils" }
4948
readyset-adapter-types = { path = "../readyset-adapter-types" }
5049
readyset-data = { path = "../readyset-data" }
50+
readyset-decimal = { path = "../readyset-decimal" }
5151
readyset-sql = { path = "../readyset-sql" }
5252
readyset-util = { path = "../readyset-util" }
5353

0 commit comments

Comments
 (0)