Skip to content

Commit eaf3c05

Browse files
authored
Use BIGNUMERIC for large decimals in bigquery (#984)
* Use BIGNUMERIC for large decimals in bigquery * Handle dest without decimal type
1 parent b4e5272 commit eaf3c05

File tree

4 files changed

+73
-2
lines changed

4 files changed

+73
-2
lines changed

dlt/destinations/impl/bigquery/bigquery.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ class BigQueryTypeMapper(TypeMapper):
6262
sct_to_dbt = {
6363
"text": "STRING(%i)",
6464
"binary": "BYTES(%i)",
65-
"decimal": "NUMERIC(%i,%i)",
6665
}
6766

6867
dbt_to_sct = {
@@ -79,6 +78,13 @@ class BigQueryTypeMapper(TypeMapper):
7978
"TIME": "time",
8079
}
8180

81+
def to_db_decimal_type(self, precision: Optional[int], scale: Optional[int]) -> str:
82+
# Use BigQuery's BIGNUMERIC for large precision decimals
83+
precision, scale = self.decimal_precision(precision, scale)
84+
if precision > 38 or scale > 9:
85+
return "BIGNUMERIC(%i,%i)" % (precision, scale)
86+
return "NUMERIC(%i,%i)" % (precision, scale)
87+
8288
# noinspection PyTypeChecker,PydanticTypeChecker
8389
def from_db_type(
8490
self, db_type: str, precision: Optional[int], scale: Optional[int]

dlt/destinations/type_mapping.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ def to_db_time_type(self, precision: Optional[int], table_format: TTableFormat =
3636
# Override in subclass if db supports other time types (e.g. with different time resolutions)
3737
return None
3838

39+
def to_db_decimal_type(self, precision: Optional[int], scale: Optional[int]) -> str:
40+
precision_tup = self.decimal_precision(precision, scale)
41+
if not precision_tup or "decimal" not in self.sct_to_dbt:
42+
return self.sct_to_unbound_dbt["decimal"]
43+
return self.sct_to_dbt["decimal"] % (precision_tup[0], precision_tup[1])
44+
3945
def to_db_type(self, column: TColumnSchema, table_format: TTableFormat = None) -> str:
4046
precision, scale = column.get("precision"), column.get("scale")
4147
sc_t = column["data_type"]
@@ -45,6 +51,8 @@ def to_db_type(self, column: TColumnSchema, table_format: TTableFormat = None) -
4551
db_t = self.to_db_datetime_type(precision, table_format)
4652
elif sc_t == "time":
4753
db_t = self.to_db_time_type(precision, table_format)
54+
elif sc_t == "decimal":
55+
db_t = self.to_db_decimal_type(precision, scale)
4856
else:
4957
db_t = None
5058
if db_t:

tests/load/bigquery/test_bigquery_table_builder.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,24 @@ def gcp_client(schema: Schema) -> BigQueryClient:
7171

7272
def test_create_table(gcp_client: BigQueryClient) -> None:
7373
# non existing table
74-
sql = gcp_client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)[0]
74+
# Add BIGNUMERIC column
75+
table_update = TABLE_UPDATE + [
76+
{
77+
"name": "col_high_p_decimal",
78+
"data_type": "decimal",
79+
"precision": 76,
80+
"scale": 0,
81+
"nullable": False,
82+
},
83+
{
84+
"name": "col_high_s_decimal",
85+
"data_type": "decimal",
86+
"precision": 38,
87+
"scale": 24,
88+
"nullable": False,
89+
},
90+
]
91+
sql = gcp_client._get_table_update_sql("event_test_table", table_update, False)[0]
7592
sqlfluff.parse(sql, dialect="bigquery")
7693
assert sql.startswith("CREATE TABLE")
7794
assert "event_test_table" in sql
@@ -92,6 +109,8 @@ def test_create_table(gcp_client: BigQueryClient) -> None:
92109
assert "`col6_precision` NUMERIC(6,2) NOT NULL" in sql
93110
assert "`col7_precision` BYTES(19)" in sql
94111
assert "`col11_precision` TIME NOT NULL" in sql
112+
assert "`col_high_p_decimal` BIGNUMERIC(76,0) NOT NULL" in sql
113+
assert "`col_high_s_decimal` BIGNUMERIC(38,24) NOT NULL" in sql
95114
assert "CLUSTER BY" not in sql
96115
assert "PARTITION BY" not in sql
97116

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import pytest
2+
3+
from dlt.common import Decimal
4+
5+
from tests.pipeline.utils import assert_load_info
6+
from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration
7+
from tests.load.utils import delete_dataset
8+
9+
10+
@pytest.mark.parametrize(
11+
"destination_config",
12+
destinations_configs(default_sql_configs=True, subset=["bigquery"]),
13+
ids=lambda x: x.name,
14+
)
15+
def test_bigquery_numeric_types(destination_config: DestinationTestConfiguration) -> None:
16+
pipeline = destination_config.setup_pipeline("test_bigquery_numeric_types")
17+
18+
columns = [
19+
{"name": "col_big_numeric", "data_type": "decimal", "precision": 47, "scale": 9},
20+
{"name": "col_numeric", "data_type": "decimal", "precision": 38, "scale": 9},
21+
]
22+
23+
data = [
24+
{
25+
# Valid BIGNUMERIC and NUMERIC values
26+
"col_big_numeric": Decimal("12345678901234567890123456789012345678.123456789"),
27+
"col_numeric": Decimal("12345678901234567890123456789.123456789"),
28+
},
29+
]
30+
31+
info = pipeline.run(iter(data), table_name="big_numeric", columns=columns) # type: ignore[arg-type]
32+
assert_load_info(info)
33+
34+
with pipeline.sql_client() as client:
35+
with client.execute_query("SELECT col_big_numeric, col_numeric FROM big_numeric;") as q:
36+
row = q.fetchone()
37+
assert row[0] == data[0]["col_big_numeric"]
38+
assert row[1] == data[0]["col_numeric"]

0 commit comments

Comments
 (0)