@@ -4444,34 +4444,39 @@ def _run_with_items(items: TDataItems, as_batch: bool) -> str:
44444444 load_id = _run_with_items ([{"id" : 3 , "value" : "3" }, {"id" : 4 , "value" : "4" }], False )
44454445 _assert_custom_metrics (load_id , 5 , 4 , 1 , 1 , 1 )
44464446
4447- # 4. run with duplicate cursor field values, but different hashes, as batch
4447+ # 4. run with duplicate cursor field values, but different hashes, as a single batch
44484448 load_id = _run_with_items (
44494449 [{"id" : 5 , "value" : "5.1" }, {"id" : 5 , "value" : "5.2" }, {"id" : 5 , "value" : "5.3" }], True
44504450 )
44514451 _assert_custom_metrics (load_id , 8 , 5 , 1 , 3 , 3 )
44524452
4453- # 5. run with the same values as batch from previous run, but with no boundary deduplication
4453+ # 5. reset incremental with no boundary deduplication (primary_key=()) and run with the same values
4454+ # from previous run, should be loaded as a single batch with 3 items
44544455 resource_with_metrics .apply_hints (
44554456 incremental = dlt .sources .incremental (cursor_path = "id" , initial_value = - 1 , primary_key = ())
44564457 )
44574458 load_id = _run_with_items (
44584459 [{"id" : 5 , "value" : "5.1" }, {"id" : 5 , "value" : "5.2" }, {"id" : 5 , "value" : "5.3" }], True
44594460 )
4460- _assert_custom_metrics (load_id , 3 , 1 , 3 , 0 , 3 )
4461+ _assert_custom_metrics (load_id , 3 , 1 , 0 , 0 , 3 )
44614462
4462- # 6. run with two new items as a single batch
4463- load_id = _run_with_items ([{"id" : 6 , "value" : "6.1" }, {"id" : 6 , "value" : "6.2" }], True )
4464- _assert_custom_metrics (load_id , 5 , 2 , 3 , 0 , 2 )
4463+ # 6. run with one old and one new item as a single batch (still no boundary deduplication)
4464+ # should be loaded as a single batch with 2 items
4465+ load_id = _run_with_items ([{"id" : 5 , "value" : "5.1" }, {"id" : 6 , "value" : "6.1" }], True )
4466+ _assert_custom_metrics (load_id , 5 , 2 , 0 , 0 , 2 )
44654467
4466- # 7. run with two new items as a single batch, with boundary deduplication
4468+ # 7. enable boundary deduplication and run with one old and one new item as a single batch
4469+ # should be loaded as a single batch with 2 items
44674470 resource_with_metrics .incremental .primary_key = "id"
4468- load_id = _run_with_items ({"id" : 7 , "value" : "7" }, True )
4469- _assert_custom_metrics (load_id , 6 , 3 , 3 , 1 , 1 )
4471+ load_id = _run_with_items ([ {"id" : 6 , "value" : "6.1" }, { "id" : 7 , "value" : "7" }] , True )
4472+ _assert_custom_metrics (load_id , 7 , 3 , 0 , 1 , 2 )
44704473
4471- # 8. run with None within a batch -> should increment unfiltered_items_count
4472- load_id = _run_with_items ([None , {"id" : 8 , "value" : "8" }], True )
4473- _assert_custom_metrics (load_id , 8 , 4 , 1 , 1 , 1 )
4474-
4475- # 9. run with None as a single batch -> should not increment unfiltered_items_count
4476- load_id = _run_with_items ([None , {"id" : 9 , "value" : "9" }], False )
4474+ # 8. run with one old and one new item each as batch
4475+ # only the new item should be loaded
4476+ load_id = _run_with_items ([{"id" : 7 , "value" : "7" }, {"id" : 8 , "value" : "8" }], False )
44774477 _assert_custom_metrics (load_id , 9 , 5 , 1 , 1 , 1 )
4478+
4479+ # 9. run with None items and one new item as single batch
4480+ # None items should increment unfiltered_items_count
4481+ load_id = _run_with_items ([None , None , {"id" : 9 , "value" : "9" }], True )
4482+ _assert_custom_metrics (load_id , 12 , 6 , 1 , 1 , 1 )
0 commit comments