Skip to content

Commit 89a50d5

Browse files
committed
improve DataValidationError output: use identifying columns if present
1 parent edfc461 commit 89a50d5

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

dlt/common/schema/exceptions.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,25 @@ def __init__(
178178
+ f" . Contract on `{schema_entity}` with `{contract_mode=:}` is violated. "
179179
+ (extended_info or "")
180180
)
181+
data_item_str = ""
182+
if (
183+
data_item
184+
and hasattr(data_item, "get")
185+
and table_schema
186+
and hasattr(table_schema, "get")
187+
):
188+
identifier_columns = [
189+
x.get("name")
190+
for x in table_schema.get("columns", {}).values()
191+
if x.get("primary_key") or x.get("merge_key") or x.get("unique")
192+
]
193+
if identifier_columns:
194+
data_item_str += "Offending data item: "
195+
data_item_keys = [
196+
f"{column}: {data_item.get(column)}" for column in identifier_columns
197+
]
198+
data_item_str += ", ".join(data_item_keys)
199+
msg += f" {data_item_str}"
181200
super().__init__(schema_name, msg)
182201
self.table_name = table_name
183202
self.column_name = column_name

tests/common/schema/test_schema_contract.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,80 @@ def test_check_adding_new_variant() -> None:
354354
cast(TSchemaContractDict, {**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}}),
355355
copy.deepcopy(table_update),
356356
)
357+
358+
359+
def test_data_validation_error_message_with_primary_key() -> None:
360+
"""Test that DataValidationError includes primary key information in the message"""
361+
schema = get_schema()
362+
363+
table_update: TTableSchema = {
364+
"name": "tables",
365+
"columns": {"column_1": {"name": "column_1", "data_type": "text", "primary_key": True}},
366+
}
367+
368+
partial_table, _ = schema.apply_schema_contract(DEFAULT_SCHEMA_CONTRACT_MODE, table_update)
369+
schema.update_table(partial_table)
370+
371+
# Create a table update that tries to add a variant column
372+
variant_table_update: TTableSchema = {
373+
"name": "tables",
374+
"columns": {
375+
"column_2_variant": {
376+
"name": "column_2_variant",
377+
"data_type": "boolean",
378+
"variant": True,
379+
}
380+
},
381+
}
382+
383+
# apply update with data_type freeze mode, providing data item as evidence
384+
with pytest.raises(DataValidationError) as val_ex:
385+
schema.apply_schema_contract(
386+
{**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}},
387+
variant_table_update,
388+
data_item={"column_1": "test", "column_2": 123},
389+
)
390+
391+
# data item should be included by primary key
392+
assert "Offending data item: column_1: test" in str(val_ex.value)
393+
394+
395+
def test_data_validation_error_message_with_multiple_identifiers() -> None:
396+
"""Test that DataValidationError includes multiple identifier columns in the message"""
397+
schema = get_schema()
398+
399+
table_update: TTableSchema = {
400+
"name": "tables",
401+
"columns": {
402+
"column_1": {"name": "column_1", "data_type": "text", "primary_key": True},
403+
"column_2": {"name": "column_2", "data_type": "bigint", "merge_key": True},
404+
"column_3": {"name": "column_3", "data_type": "text", "unique": True},
405+
},
406+
}
407+
408+
partial_table, _ = schema.apply_schema_contract(DEFAULT_SCHEMA_CONTRACT_MODE, table_update)
409+
schema.update_table(partial_table)
410+
411+
# Now create a table update that tries to add a variant column (data type evolution)
412+
variant_table_update: TTableSchema = {
413+
"name": "tables",
414+
"columns": {
415+
"column_4_variant": {"name": "column_4_variant", "data_type": "text", "variant": True}
416+
},
417+
}
418+
419+
with pytest.raises(DataValidationError) as val_ex:
420+
schema.apply_schema_contract(
421+
{**DEFAULT_SCHEMA_CONTRACT_MODE, **{"data_type": "freeze"}},
422+
variant_table_update,
423+
data_item={
424+
"column_1": "test",
425+
"column_2": 123,
426+
"column_3": "unique_value",
427+
"column_4": "some_text",
428+
},
429+
)
430+
431+
# Check that the message includes all identifier columns
432+
error_msg = str(val_ex.value)
433+
assert "Offending data item: column_1: test, column_2: 123, column_3: unique_value" in error_msg

0 commit comments

Comments
 (0)