11import os
22from datetime import datetime # noqa: I251
3- from typing import Generic , ClassVar , Any , Optional , Type , Dict , Union , Literal , Tuple
3+ from typing import (
4+ Generic ,
5+ ClassVar ,
6+ Any ,
7+ Optional ,
8+ Type ,
9+ Dict ,
10+ Union ,
11+ Literal ,
12+ Tuple ,
13+ )
414
515import inspect
616from functools import wraps
2030 is_optional_type ,
2131 is_subclass ,
2232 TColumnNames ,
33+ TypedDict ,
2334)
2435from dlt .common .configuration import configspec , ConfigurationValueError
2536from dlt .common .configuration .specs import BaseConfiguration
6374 pandas = None
6475
6576
77+ class IncrementalCustomMetrics (TypedDict , total = False ):
78+ unfiltered_items_count : int
79+ unfiltered_batches_count : int
80+ initial_unique_hashes_count : int
81+ final_unique_hashes_count : int
82+
83+
6684@configspec
67- class Incremental (ItemTransform [TDataItem ], BaseConfiguration , Generic [TCursorValue ]):
85+ class Incremental (
86+ ItemTransform [TDataItem , IncrementalCustomMetrics ], BaseConfiguration , Generic [TCursorValue ]
87+ ):
6888 """Adds incremental extraction for a resource by storing a cursor value in persistent state.
6989
7090 The cursor could for example be a timestamp for when the record was created and you can use this to load only
@@ -191,8 +211,12 @@ def __init__(
191211 """Bound pipe"""
192212 self .range_start = range_start
193213 self .range_end = range_end
194- # Initialize custom metrics
195- BaseItemTransform .__init__ (self )
214+ self ._custom_metrics : IncrementalCustomMetrics = {
215+ "unfiltered_items_count" : 0 ,
216+ "unfiltered_batches_count" : 0 ,
217+ "initial_unique_hashes_count" : 0 ,
218+ "final_unique_hashes_count" : 0 ,
219+ }
196220
197221 @property
198222 def primary_key (self ) -> Optional [TTableHintTemplate [TColumnNames ]]:
@@ -570,13 +594,8 @@ def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
570594 return rows
571595
572596 # collect metrics
573- self .custom_metrics ["unfiltered_items_count" ] = self .custom_metrics .get (
574- "unfiltered_items_count" , 0
575- ) + count_rows_in_items (rows )
576- self .custom_metrics ["unfiltered_batches_count" ] = (
577- self .custom_metrics .get ("unfiltered_batches_count" , 0 ) + 1
578- )
579- self .custom_metrics ["unique_hashes_count" ] = len (self .get_state ().get ("unique_hashes" , []))
597+ self .custom_metrics ["unfiltered_items_count" ] += count_rows_in_items (rows )
598+ self .custom_metrics ["unfiltered_batches_count" ] += 1
580599
581600 transformer = self ._get_transform (rows )
582601 if isinstance (rows , list ):
@@ -599,6 +618,10 @@ def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
599618 # writing back state
600619 self ._cached_state ["last_value" ] = transformer .last_value
601620
621+ initial_hash_list = self ._cached_state .get ("unique_hashes" )
622+ initial_hash_count = len (initial_hash_list ) if initial_hash_list else 0
623+ self .custom_metrics ["initial_unique_hashes_count" ] = initial_hash_count
624+
602625 if transformer .boundary_deduplication :
603626 # compute hashes for new last rows
604627 # NOTE: object transform uses last_rows to pass rows to dedup, arrow computes
@@ -607,11 +630,11 @@ def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
607630 transformer .compute_unique_value (row , self .primary_key )
608631 for row in transformer .last_rows
609632 )
610- initial_hash_count = len (self ._cached_state .get ("unique_hashes" , []))
611633 # add directly computed hashes
612634 unique_hashes .update (transformer .unique_hashes )
613635 self ._cached_state ["unique_hashes" ] = list (unique_hashes )
614636 final_hash_count = len (self ._cached_state ["unique_hashes" ])
637+ self .custom_metrics ["final_unique_hashes_count" ] = final_hash_count
615638
616639 self ._check_duplicate_cursor_threshold (initial_hash_count , final_hash_count )
617640 return rows
@@ -636,7 +659,7 @@ def _check_duplicate_cursor_threshold(
636659TIncrementalConfig = Union [Incremental [Any ], IncrementalArgs ]
637660
638661
639- class IncrementalResourceWrapper (ItemTransform [TDataItem ]):
662+ class IncrementalResourceWrapper (ItemTransform [TDataItem , IncrementalCustomMetrics ]):
640663 placement_affinity : ClassVar [float ] = 1 # stick to end
641664
642665 _incremental : Optional [Incremental [Any ]] = None
@@ -798,8 +821,8 @@ def allow_external_schedulers(self, value: bool) -> None:
798821 self ._incremental .allow_external_schedulers = value
799822
800823 @property
801- def custom_metrics (self ) -> Dict [ str , Any ] :
802- """Returns custom metrics of the Incremental object itself"""
824+ def custom_metrics (self ) -> IncrementalCustomMetrics :
825+ """Returns custom metrics of the Incremental object itself if exists """
803826 if self ._incremental :
804827 return self ._incremental .custom_metrics
805828 return {}
0 commit comments