4141from dlt .common .schema .typing import TColumnNames , TColumnSchema , TWriteDisposition , TSchemaContract
4242from dlt .common .source import get_current_pipe_name
4343from dlt .common .storages .load_storage import LoadPackageInfo
44+ from dlt .common .time import ensure_pendulum_datetime , precise_time
4445from dlt .common .typing import DictStrAny , REPattern , StrAny , SupportsHumanize
4546from dlt .common .jsonpath import delete_matches , TAnyJsonPath
4647from dlt .common .data_writers .writers import DataWriterMetrics , TLoaderFileFormat
@@ -53,24 +54,61 @@ class _StepInfo(NamedTuple):
5354 """ids of the loaded packages"""
5455 load_packages : List [LoadPackageInfo ]
5556 """Information on loaded packages"""
56- started_at : datetime .datetime
5757 first_run : bool
58+ started_at : datetime .datetime
59+ finished_at : datetime .datetime
60+
61+
62+ class StepMetrics (TypedDict ):
63+ """Metrics for particular package processed in particular pipeline step"""
64+
65+ started_at : datetime .datetime
66+ """Start of package processing"""
67+ finished_at : datetime .datetime
68+ """End of package processing"""
69+
5870
71+ TStepMetricsCo = TypeVar ("TStepMetricsCo" , bound = StepMetrics , covariant = True )
5972
60- class StepInfo (SupportsHumanize ):
73+
74+ class StepInfo (SupportsHumanize , Generic [TStepMetricsCo ]):
6175 pipeline : "SupportsPipeline"
76+ metrics : Dict [str , List [TStepMetricsCo ]]
77+ """Metrics per load id. If many sources with the same name were extracted, there will be more than 1 element in the list"""
6278 loads_ids : List [str ]
6379 """ids of the loaded packages"""
6480 load_packages : List [LoadPackageInfo ]
6581 """Information on loaded packages"""
66- started_at : datetime .datetime
6782 first_run : bool
6883
84+ @property
85+ def started_at (self ) -> datetime .datetime :
86+ """Returns the earliest start date of all collected metrics"""
87+ if not self .metrics :
88+ return None
89+ try :
90+ return min (m ["started_at" ] for l_m in self .metrics .values () for m in l_m )
91+ except ValueError :
92+ return None
93+
94+ @property
95+ def finished_at (self ) -> datetime .datetime :
96+ """Returns the latest end date of all collected metrics"""
97+ if not self .metrics :
98+ return None
99+ try :
100+ return max (m ["finished_at" ] for l_m in self .metrics .values () for m in l_m )
101+ except ValueError :
102+ return None
103+
69104 def asdict (self ) -> DictStrAny :
70105 # to be mixed with NamedTuple
71106 d : DictStrAny = self ._asdict () # type: ignore
72107 d ["pipeline" ] = {"pipeline_name" : self .pipeline .pipeline_name }
73108 d ["load_packages" ] = [package .asdict () for package in self .load_packages ]
109+ if self .metrics :
110+ d ["started_at" ] = self .started_at
111+ d ["finished_at" ] = self .finished_at
74112 return d
75113
76114 def __str__ (self ) -> str :
@@ -91,7 +129,12 @@ def job_metrics_asdict(
91129
92130 def _astuple (self ) -> _StepInfo :
93131 return _StepInfo (
94- self .pipeline , self .loads_ids , self .load_packages , self .started_at , self .first_run
132+ self .pipeline ,
133+ self .loads_ids ,
134+ self .load_packages ,
135+ self .first_run ,
136+ self .started_at ,
137+ self .finished_at ,
95138 )
96139
97140
@@ -100,7 +143,7 @@ class ExtractDataInfo(TypedDict):
100143 data_type : str
101144
102145
103- class ExtractMetrics (TypedDict ):
146+ class ExtractMetrics (StepMetrics ):
104147 schema_name : str
105148 job_metrics : Dict [str , DataWriterMetrics ]
106149 """Metrics collected per job id during writing of job file"""
@@ -115,19 +158,19 @@ class ExtractMetrics(TypedDict):
115158
116159
117160class _ExtractInfo (NamedTuple ):
161+ """NamedTuple cannot be part of the derivation chain so we must re-declare all fields to use it as mixin later"""
162+
118163 pipeline : "SupportsPipeline"
119164 metrics : Dict [str , List [ExtractMetrics ]]
120- """Metrics per load id. If many sources with the same name were extracted, there will be more than 1 element in the list"""
121165 extract_data_info : List [ExtractDataInfo ]
122166 loads_ids : List [str ]
123167 """ids of the loaded packages"""
124168 load_packages : List [LoadPackageInfo ]
125169 """Information on loaded packages"""
126- started_at : datetime .datetime
127170 first_run : bool
128171
129172
130- class ExtractInfo (StepInfo , _ExtractInfo ):
173+ class ExtractInfo (StepInfo [ ExtractMetrics ] , _ExtractInfo ): # type: ignore[misc]
131174 """A tuple holding information on extracted data items. Returned by pipeline `extract` method."""
132175
133176 def asdict (self ) -> DictStrAny :
@@ -178,7 +221,10 @@ def asstr(self, verbosity: int = 0) -> str:
178221 return ""
179222
180223
181- class NormalizeMetrics (TypedDict ):
224+ # reveal_type(ExtractInfo)
225+
226+
227+ class NormalizeMetrics (StepMetrics ):
182228 job_metrics : Dict [str , DataWriterMetrics ]
183229 """Metrics collected per job id during writing of job file"""
184230 table_metrics : Dict [str , DataWriterMetrics ]
@@ -192,11 +238,10 @@ class _NormalizeInfo(NamedTuple):
192238 """ids of the loaded packages"""
193239 load_packages : List [LoadPackageInfo ]
194240 """Information on loaded packages"""
195- started_at : datetime .datetime
196241 first_run : bool
197242
198243
199- class NormalizeInfo (StepInfo , _NormalizeInfo ):
244+ class NormalizeInfo (StepInfo [ NormalizeMetrics ] , _NormalizeInfo ): # type: ignore[misc]
200245 """A tuple holding information on normalized data items. Returned by pipeline `normalize` method."""
201246
202247 @property
@@ -244,8 +289,13 @@ def asstr(self, verbosity: int = 0) -> str:
244289 return msg
245290
246291
292+ class LoadMetrics (StepMetrics ):
293+ pass
294+
295+
247296class _LoadInfo (NamedTuple ):
248297 pipeline : "SupportsPipeline"
298+ metrics : Dict [str , List [LoadMetrics ]]
249299 destination_type : str
250300 destination_displayable_credentials : str
251301 destination_name : str
@@ -259,11 +309,10 @@ class _LoadInfo(NamedTuple):
259309 """ids of the loaded packages"""
260310 load_packages : List [LoadPackageInfo ]
261311 """Information on loaded packages"""
262- started_at : datetime .datetime
263312 first_run : bool
264313
265314
266- class LoadInfo (StepInfo , _LoadInfo ):
315+ class LoadInfo (StepInfo [ LoadMetrics ] , _LoadInfo ): # type: ignore[misc]
267316 """A tuple holding the information on recently loaded packages. Returned by pipeline `run` and `load` methods"""
268317
269318 def asdict (self ) -> DictStrAny :
@@ -329,32 +378,38 @@ def __str__(self) -> str:
329378 return self .asstr (verbosity = 1 )
330379
331380
332- TStepMetrics = TypeVar ("TStepMetrics" )
333- TStepInfo = TypeVar ("TStepInfo" , bound = StepInfo )
381+ TStepMetrics = TypeVar ("TStepMetrics" , bound = StepMetrics , covariant = False )
382+ TStepInfo = TypeVar ("TStepInfo" , bound = StepInfo [ StepMetrics ] )
334383
335384
336385class WithStepInfo (ABC , Generic [TStepMetrics , TStepInfo ]):
337386 """Implemented by classes that generate StepInfo with metrics and package infos"""
338387
339388 _current_load_id : str
340389 _load_id_metrics : Dict [str , List [TStepMetrics ]]
390+ _current_load_started : float
341391 """Completed load ids metrics"""
342392
343393 def __init__ (self ) -> None :
344394 self ._load_id_metrics = {}
345395 self ._current_load_id = None
396+ self ._current_load_started = None
346397
347398 def _step_info_start_load_id (self , load_id : str ) -> None :
348399 self ._current_load_id = load_id
400+ self ._current_load_started = precise_time ()
349401 self ._load_id_metrics .setdefault (load_id , [])
350402
351403 def _step_info_complete_load_id (self , load_id : str , metrics : TStepMetrics ) -> None :
352404 assert self ._current_load_id == load_id , (
353405 f"Current load id mismatch { self ._current_load_id } != { load_id } when completing step"
354406 " info"
355407 )
408+ metrics ["started_at" ] = ensure_pendulum_datetime (self ._current_load_started )
409+ metrics ["finished_at" ] = ensure_pendulum_datetime (precise_time ())
356410 self ._load_id_metrics [load_id ].append (metrics )
357411 self ._current_load_id = None
412+ self ._current_load_started = None
358413
359414 def _step_info_metrics (self , load_id : str ) -> List [TStepMetrics ]:
360415 return self ._load_id_metrics [load_id ]
@@ -368,8 +423,6 @@ def current_load_id(self) -> str:
368423 def get_step_info (
369424 self ,
370425 pipeline : "SupportsPipeline" ,
371- started_at : datetime .datetime = None ,
372- completed_at : datetime .datetime = None ,
373426 ) -> TStepInfo :
374427 """Returns and instance of StepInfo with metrics and package infos"""
375428 pass
0 commit comments