|
23 | 23 |
|
24 | 24 | from pyiceberg.catalog import Catalog
|
25 | 25 | from pyiceberg.exceptions import NoSuchTableError
|
26 |
| -from pyiceberg.expressions import AlwaysTrue, And, EqualTo, Reference |
27 |
| -from pyiceberg.expressions.literals import LongLiteral |
| 26 | +from pyiceberg.expressions import AlwaysTrue, And, BooleanExpression, EqualTo, In, IsNaN, IsNull, Or, Reference |
| 27 | +from pyiceberg.expressions.literals import DoubleLiteral, LongLiteral |
28 | 28 | from pyiceberg.io.pyarrow import schema_to_pyarrow
|
29 | 29 | from pyiceberg.schema import Schema
|
30 | 30 | from pyiceberg.table import UpsertResult
|
@@ -440,6 +440,70 @@ def test_create_match_filter_single_condition() -> None:
|
440 | 440 | )
|
441 | 441 |
|
442 | 442 |
|
| 443 | +@pytest.mark.parametrize( |
| 444 | + "data, expected", |
| 445 | + [ |
| 446 | + pytest.param( |
| 447 | + [{"x": 1.0}, {"x": 2.0}, {"x": None}, {"x": 4.0}, {"x": float("nan")}], |
| 448 | + Or( |
| 449 | + left=IsNull(term=Reference(name="x")), |
| 450 | + right=Or( |
| 451 | + left=IsNaN(term=Reference(name="x")), |
| 452 | + right=In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(4.0)}), |
| 453 | + ), |
| 454 | + ), |
| 455 | + id="single-column", |
| 456 | + ), |
| 457 | + pytest.param( |
| 458 | + [ |
| 459 | + {"x": 1.0, "y": 9.0}, |
| 460 | + {"x": 2.0, "y": None}, |
| 461 | + {"x": None, "y": 7.0}, |
| 462 | + {"x": 4.0, "y": float("nan")}, |
| 463 | + {"x": float("nan"), "y": 0.0}, |
| 464 | + ], |
| 465 | + Or( |
| 466 | + left=Or( |
| 467 | + left=And( |
| 468 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(1.0)), |
| 469 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(9.0)), |
| 470 | + ), |
| 471 | + right=And( |
| 472 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(2.0)), |
| 473 | + right=IsNull(term=Reference(name="y")), |
| 474 | + ), |
| 475 | + ), |
| 476 | + right=Or( |
| 477 | + left=And( |
| 478 | + left=IsNull(term=Reference(name="x")), |
| 479 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(7.0)), |
| 480 | + ), |
| 481 | + right=Or( |
| 482 | + left=And( |
| 483 | + left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(4.0)), |
| 484 | + right=IsNaN(term=Reference(name="y")), |
| 485 | + ), |
| 486 | + right=And( |
| 487 | + left=IsNaN(term=Reference(name="x")), |
| 488 | + right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(0.0)), |
| 489 | + ), |
| 490 | + ), |
| 491 | + ), |
| 492 | + ), |
| 493 | + id="multi-column", |
| 494 | + ), |
| 495 | + ], |
| 496 | +) |
| 497 | +def test_create_match_filter_with_nulls(data: list[dict], expected: BooleanExpression) -> None: |
| 498 | + schema = pa.schema([pa.field("x", pa.float64()), pa.field("y", pa.float64())]) |
| 499 | + table = pa.Table.from_pylist(data, schema=schema) |
| 500 | + join_cols = sorted({col for record in data for col in record}) |
| 501 | + |
| 502 | + expr = create_match_filter(table, join_cols) |
| 503 | + |
| 504 | + assert expr == expected |
| 505 | + |
| 506 | + |
443 | 507 | def test_upsert_with_duplicate_rows_in_table(catalog: Catalog) -> None:
|
444 | 508 | identifier = "default.test_upsert_with_duplicate_rows_in_table"
|
445 | 509 |
|
|
0 commit comments