Skip to content

Commit 075a966

Browse files
committed
test: add test cases for create_match_filter
1 parent 31ed623 commit 075a966

File tree

1 file changed

+66
-2
lines changed

1 file changed

+66
-2
lines changed

tests/table/test_upsert.py

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323

2424
from pyiceberg.catalog import Catalog
2525
from pyiceberg.exceptions import NoSuchTableError
26-
from pyiceberg.expressions import AlwaysTrue, And, EqualTo, Reference
27-
from pyiceberg.expressions.literals import LongLiteral
26+
from pyiceberg.expressions import AlwaysTrue, And, BooleanExpression, EqualTo, In, IsNaN, IsNull, Or, Reference
27+
from pyiceberg.expressions.literals import DoubleLiteral, LongLiteral
2828
from pyiceberg.io.pyarrow import schema_to_pyarrow
2929
from pyiceberg.schema import Schema
3030
from pyiceberg.table import UpsertResult
@@ -440,6 +440,70 @@ def test_create_match_filter_single_condition() -> None:
440440
)
441441

442442

443+
@pytest.mark.parametrize(
444+
"data, expected",
445+
[
446+
pytest.param(
447+
[{"x": 1.0}, {"x": 2.0}, {"x": None}, {"x": 4.0}, {"x": float("nan")}],
448+
Or(
449+
left=IsNull(term=Reference(name="x")),
450+
right=Or(
451+
left=IsNaN(term=Reference(name="x")),
452+
right=In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(4.0)}),
453+
),
454+
),
455+
id="single-column",
456+
),
457+
pytest.param(
458+
[
459+
{"x": 1.0, "y": 9.0},
460+
{"x": 2.0, "y": None},
461+
{"x": None, "y": 7.0},
462+
{"x": 4.0, "y": float("nan")},
463+
{"x": float("nan"), "y": 0.0},
464+
],
465+
Or(
466+
left=Or(
467+
left=And(
468+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(1.0)),
469+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(9.0)),
470+
),
471+
right=And(
472+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(2.0)),
473+
right=IsNull(term=Reference(name="y")),
474+
),
475+
),
476+
right=Or(
477+
left=And(
478+
left=IsNull(term=Reference(name="x")),
479+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(7.0)),
480+
),
481+
right=Or(
482+
left=And(
483+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(4.0)),
484+
right=IsNaN(term=Reference(name="y")),
485+
),
486+
right=And(
487+
left=IsNaN(term=Reference(name="x")),
488+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(0.0)),
489+
),
490+
),
491+
),
492+
),
493+
id="multi-column",
494+
),
495+
],
496+
)
497+
def test_create_match_filter_with_nulls(data: list[dict], expected: BooleanExpression) -> None:
498+
schema = pa.schema([pa.field("x", pa.float64()), pa.field("y", pa.float64())])
499+
table = pa.Table.from_pylist(data, schema=schema)
500+
join_cols = sorted({col for record in data for col in record})
501+
502+
expr = create_match_filter(table, join_cols)
503+
504+
assert expr == expected
505+
506+
443507
def test_upsert_with_duplicate_rows_in_table(catalog: Catalog) -> None:
444508
identifier = "default.test_upsert_with_duplicate_rows_in_table"
445509

0 commit comments

Comments
 (0)