4141from dlt .common .schema .typing import TColumnNames , TColumnSchema , TWriteDisposition , TSchemaContract
4242from dlt .common .source import get_current_pipe_name
4343from dlt .common .storages .load_storage import LoadPackageInfo
44- from dlt .common .typing import DictStrAny , REPattern , SupportsHumanize
44+ from dlt .common .typing import DictStrAny , REPattern , StrAny , SupportsHumanize
4545from dlt .common .jsonpath import delete_matches , TAnyJsonPath
4646from dlt .common .data_writers .writers import DataWriterMetrics , TLoaderFileFormat
4747from dlt .common .utils import RowCounts , merge_row_counts
4848
4949
50+ class _StepInfo (NamedTuple ):
51+ pipeline : "SupportsPipeline"
52+ loads_ids : List [str ]
53+ """ids of the loaded packages"""
54+ load_packages : List [LoadPackageInfo ]
55+ """Information on loaded packages"""
56+ started_at : datetime .datetime
57+ first_run : bool
58+
59+
5060class StepInfo (SupportsHumanize ):
5161 pipeline : "SupportsPipeline"
5262 loads_ids : List [str ]
@@ -56,9 +66,34 @@ class StepInfo(SupportsHumanize):
5666 started_at : datetime .datetime
5767 first_run : bool
5868
69+ def asdict (self ) -> DictStrAny :
70+ # to be mixed with NamedTuple
71+ d : DictStrAny = self ._asdict () # type: ignore
72+ d ["pipeline" ] = {"pipeline_name" : self .pipeline .pipeline_name }
73+ d ["load_packages" ] = [package .asdict () for package in self .load_packages ]
74+ return d
75+
5976 def __str__ (self ) -> str :
6077 return self .asstr (verbosity = 0 )
6178
79+ @staticmethod
80+ def job_metrics_asdict (
81+ job_metrics : Dict [str , DataWriterMetrics ], key_name : str = "job_id" , extend : StrAny = None
82+ ) -> List [DictStrAny ]:
83+ jobs = []
84+ for job_id , metrics in job_metrics .items ():
85+ d = metrics ._asdict ()
86+ if extend :
87+ d .update (extend )
88+ d [key_name ] = job_id
89+ jobs .append (d )
90+ return jobs
91+
92+ def _astuple (self ) -> _StepInfo :
93+ return _StepInfo (
94+ self .pipeline , self .loads_ids , self .load_packages , self .started_at , self .first_run
95+ )
96+
6297
6398class ExtractDataInfo (TypedDict ):
6499 name : str
@@ -82,6 +117,7 @@ class ExtractMetrics(TypedDict):
82117class _ExtractInfo (NamedTuple ):
83118 pipeline : "SupportsPipeline"
84119 metrics : Dict [str , List [ExtractMetrics ]]
120+ """Metrics per load id. If many sources with the same name were extracted, there will be more than 1 element in the list"""
85121 extract_data_info : List [ExtractDataInfo ]
86122 loads_ids : List [str ]
87123 """ids of the loaded packages"""
@@ -96,12 +132,46 @@ class ExtractInfo(StepInfo, _ExtractInfo):
96132
97133 def asdict (self ) -> DictStrAny :
98134 """A dictionary representation of ExtractInfo that can be loaded with `dlt`"""
99- d = self ._asdict ()
100- d ["pipeline" ] = {"pipeline_name" : self .pipeline .pipeline_name }
101- d ["load_packages" ] = [package .asdict () for package in self .load_packages ]
102- # TODO: transform and leave metrics when we have them implemented
103- # d.pop("metrics")
135+ d = super ().asdict ()
104136 d .pop ("extract_data_info" )
137+ # transform metrics
138+ d .pop ("metrics" )
139+ load_metrics : Dict [str , List [Any ]] = {
140+ "job_metrics" : [],
141+ "table_metrics" : [],
142+ "resource_metrics" : [],
143+ "dag" : [],
144+ "hints" : [],
145+ }
146+ for load_id , metrics_list in self .metrics .items ():
147+ for idx , metrics in enumerate (metrics_list ):
148+ extend = {"load_id" : load_id , "extract_idx" : idx }
149+ load_metrics ["job_metrics" ].extend (
150+ self .job_metrics_asdict (metrics ["job_metrics" ], extend = extend )
151+ )
152+ load_metrics ["table_metrics" ].extend (
153+ self .job_metrics_asdict (
154+ metrics ["table_metrics" ], key_name = "table_name" , extend = extend
155+ )
156+ )
157+ load_metrics ["resource_metrics" ].extend (
158+ self .job_metrics_asdict (
159+ metrics ["resource_metrics" ], key_name = "resource_name" , extend = extend
160+ )
161+ )
162+ load_metrics ["dag" ].extend (
163+ [
164+ {** extend , "parent_name" : edge [0 ], "resource_name" : edge [1 ]}
165+ for edge in metrics ["dag" ]
166+ ]
167+ )
168+ load_metrics ["hints" ].extend (
169+ [
170+ {** extend , "resource_name" : name , ** hints }
171+ for name , hints in metrics ["hints" ].items ()
172+ ]
173+ )
174+ d .update (load_metrics )
105175 return d
106176
107177 def asstr (self , verbosity : int = 0 ) -> str :
@@ -143,19 +213,25 @@ def row_counts(self) -> RowCounts:
143213
144214 def asdict (self ) -> DictStrAny :
145215 """A dictionary representation of NormalizeInfo that can be loaded with `dlt`"""
146- d = self ._asdict ()
147- d ["pipeline" ] = {"pipeline_name" : self .pipeline .pipeline_name }
148- d ["load_packages" ] = [package .asdict () for package in self .load_packages ]
149- # list representation creates a nice table
150- d ["row_counts" ] = []
151- for load_id , metrics in self .metrics .items ():
152- assert len (metrics ) == 1 , "Cannot deal with more than 1 normalize metric per load_id"
153- d ["row_counts" ].extend (
154- [
155- {"load_id" : load_id , "table_name" : k , "count" : v .items_count }
156- for k , v in metrics [0 ]["table_metrics" ].items ()
157- ]
158- )
216+ d = super ().asdict ()
217+ # transform metrics
218+ d .pop ("metrics" )
219+ load_metrics : Dict [str , List [Any ]] = {
220+ "job_metrics" : [],
221+ "table_metrics" : [],
222+ }
223+ for load_id , metrics_list in self .metrics .items ():
224+ for idx , metrics in enumerate (metrics_list ):
225+ extend = {"load_id" : load_id , "extract_idx" : idx }
226+ load_metrics ["job_metrics" ].extend (
227+ self .job_metrics_asdict (metrics ["job_metrics" ], extend = extend )
228+ )
229+ load_metrics ["table_metrics" ].extend (
230+ self .job_metrics_asdict (
231+ metrics ["table_metrics" ], key_name = "table_name" , extend = extend
232+ )
233+ )
234+ d .update (load_metrics )
159235 return d
160236
161237 def asstr (self , verbosity : int = 0 ) -> str :
@@ -192,10 +268,7 @@ class LoadInfo(StepInfo, _LoadInfo):
192268
193269 def asdict (self ) -> DictStrAny :
194270 """A dictionary representation of LoadInfo that can be loaded with `dlt`"""
195- d = self ._asdict ()
196- d ["pipeline" ] = {"pipeline_name" : self .pipeline .pipeline_name }
197- d ["load_packages" ] = [package .asdict () for package in self .load_packages ]
198- return d
271+ return super ().asdict ()
199272
200273 def asstr (self , verbosity : int = 0 ) -> str :
201274 msg = f"Pipeline { self .pipeline .pipeline_name } completed in "
@@ -273,7 +346,7 @@ def __init__(self) -> None:
273346
274347 def _step_info_start_load_id (self , load_id : str ) -> None :
275348 self ._current_load_id = load_id
276- self ._load_id_metrics [ load_id ] = []
349+ self ._load_id_metrics . setdefault ( load_id , [])
277350
278351 def _step_info_complete_load_id (self , load_id : str , metrics : TStepMetrics ) -> None :
279352 assert self ._current_load_id == load_id , (
0 commit comments