Various tests

anuunchin · anuunchin · commit 878eba9d132c · 2025-11-03T11:50:55.000+01:00
diff --git a/dlt/_workspace/helpers/dashboard/dlt_dashboard.py b/dlt/_workspace/helpers/dashboard/dlt_dashboard.py
@@ -72,8 +72,8 @@ def home(
     else:
         _buttons: List[Any] = []
         _buttons.append(dlt_refresh_button)
-        _pipeline_run_summary: mo.Html = None
-        _last_load_packages_button: mo.Html = None
+        _pipeline_execution_summary: mo.Html = None
+        _last_load_packages_info: mo.Html = None
         if dlt_pipeline:
             _buttons.append(
                 mo.ui.button(
@@ -88,14 +88,16 @@ def home(
                         on_click=lambda _: utils.open_local_folder(local_dir),
                     )
                 )
-
-            _pipeline_run_summary = utils.build_pipeline_run_visualization(dlt_pipeline.last_trace)
-            _last_load_packages_button = mo.vstack(
-                [
-                    mo.md(f"<small>{strings.view_load_packages_text}</small>"),
-                    utils.load_package_status_labels(dlt_pipeline.last_trace),
-                ]
-            )
+            if dlt_pipeline.last_trace:
+                _pipeline_execution_summary = utils.build_pipeline_execution_visualization(
+                    dlt_pipeline.last_trace
+                )
+                _last_load_packages_info = mo.vstack(
+                    [
+                        mo.md(f"<small>{strings.view_load_packages_text}</small>"),
+                        utils.load_package_status_labels(dlt_pipeline.last_trace),
+                    ]
+                )
         _stack = [
             mo.vstack(
                 [
@@ -112,8 +114,8 @@ def home(
                     ),
                 ]
             ),
-            _pipeline_run_summary,
-            _last_load_packages_button,
+            _pipeline_execution_summary,
+            _last_load_packages_info,
             mo.hstack(_buttons, justify="start"),
         ]
         if not dlt_pipeline and dlt_pipeline_name:
diff --git a/dlt/_workspace/helpers/dashboard/utils.py b/dlt/_workspace/helpers/dashboard/utils.py
@@ -857,7 +857,7 @@ def _format_duration(ms: float) -> str:
         return f"{round(ms / 6000) / 10}"
 
 
-def _build_pipeline_run_html(
+def _build_pipeline_execution_html(
     transaction_id: str,
     status: TPipelineRunStatus,
     steps_data: List[PipelineStepData],
@@ -909,12 +909,12 @@ def _build_pipeline_run_html(
     # Build the migration badge if applicable
     migration_badge = f"""
     <div style="
-        background-color: {'var(--yellow-bg)'};
-        color: {'var(--yellow-text)'};
+        background-color: var(--yellow-bg);
+        color: var(--yellow-text);
         padding: 6px 16px;
         border-radius: 6px;
     ">
-        <strong>{f'{migrations_count} dataset migration(s)'}</strong>
+        <strong>{migrations_count} dataset migration(s)</strong>
     </div>
     """ if migrations_count > 0 else ""
 
@@ -1019,14 +1019,13 @@ def _get_migrations_count(last_load_info: LoadInfo) -> int:
     return migrations_count
 
 
-def build_pipeline_run_visualization(trace: PipelineTrace) -> Optional[mo.Html]:
+def build_pipeline_execution_visualization(trace: PipelineTrace) -> Optional[mo.Html]:
     """Creates a visual timeline of pipeline run showing extract, normalize and load steps"""
 
     steps_data, status = _get_steps_data_and_status(trace.steps)
-
     migrations_count = _get_migrations_count(trace.last_load_info) if trace.last_load_info else 0
 
-    return _build_pipeline_run_html(
+    return _build_pipeline_execution_html(
         trace.transaction_id,
         status,
         steps_data,
@@ -1061,11 +1060,6 @@ def build_pipeline_run_visualization(trace: PipelineTrace) -> Optional[mo.Html]:
 )
 
 
-class TVisualLoadPackageStatusAndSteps(TypedDict):
-    package: LoadPackageInfo
-    seen_in_steps: List[TVisualPipelineStep]
-
-
 def _collect_load_packages_from_trace(
     trace: PipelineTrace,
 ) -> List[LoadPackageInfo]:
diff --git a/tests/workspace/helpers/dashboard/conftest.py b/tests/workspace/helpers/dashboard/conftest.py
@@ -5,6 +5,7 @@
     create_success_pipeline_duckdb,
     create_success_pipeline_filesystem,
     create_extract_exception_pipeline,
+    create_normalize_exception_pipeline,
     create_never_ran_pipeline,
     create_load_exception_pipeline,
     create_no_destination_pipeline,
@@ -43,6 +44,13 @@ def extract_exception_pipeline():
         yield create_extract_exception_pipeline(temp_dir)
 
 
+@pytest.fixture
+def normalize_exception_pipeline(temp_pipelines_dir):
+    """Fixture that creates a normalize exception pipeline"""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        yield create_normalize_exception_pipeline(temp_dir)
+
+
 @pytest.fixture(scope="session")
 def never_ran_pipline():
     with tempfile.TemporaryDirectory() as temp_dir:
diff --git a/tests/workspace/helpers/dashboard/example_pipelines.py b/tests/workspace/helpers/dashboard/example_pipelines.py
@@ -18,20 +18,26 @@
 SUCCESS_PIPELINE_DUCKDB = "success_pipeline_duckdb"
 SUCCESS_PIPELINE_FILESYSTEM = "success_pipeline_filesystem"
 EXTRACT_EXCEPTION_PIPELINE = "extract_exception_pipeline"
+NORMALIZE_EXCEPTION_PIPELINE = "normalize_exception_pipeline"
 NEVER_RAN_PIPELINE = "never_ran_pipline"
 LOAD_EXCEPTION_PIPELINE = "load_exception_pipeline"
 NO_DESTINATION_PIPELINE = "no_destination_pipeline"
 
 ALL_PIPELINES = [
     SUCCESS_PIPELINE_DUCKDB,
     EXTRACT_EXCEPTION_PIPELINE,
+    NORMALIZE_EXCEPTION_PIPELINE,
     NEVER_RAN_PIPELINE,
     LOAD_EXCEPTION_PIPELINE,
     NO_DESTINATION_PIPELINE,
     SUCCESS_PIPELINE_FILESYSTEM,
 ]
 
-PIPELINES_WITH_EXCEPTIONS = [EXTRACT_EXCEPTION_PIPELINE, LOAD_EXCEPTION_PIPELINE]
+PIPELINES_WITH_EXCEPTIONS = [
+    EXTRACT_EXCEPTION_PIPELINE,
+    NORMALIZE_EXCEPTION_PIPELINE,
+    LOAD_EXCEPTION_PIPELINE,
+]
 PIPELINES_WITH_LOAD = [SUCCESS_PIPELINE_DUCKDB, SUCCESS_PIPELINE_FILESYSTEM]
 
 
@@ -142,6 +148,33 @@ def broken_resource():
     return pipeline
 
 
+def create_normalize_exception_pipeline(pipelines_dir: str = None):
+    """Create a test pipeline with duckdb destination, raises an exception in the normalize step"""
+    import duckdb
+
+    pipeline = dlt.pipeline(
+        pipeline_name=NORMALIZE_EXCEPTION_PIPELINE,
+        pipelines_dir=pipelines_dir,
+        destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
+    )
+
+    @dlt.resource
+    def data_with_type_conflict():
+        # First yield double, then string for same column - causes normalize failure with strict schema contract
+        yield [{"id": 1, "value": 123.4}]
+        yield [{"id": 2, "value": "string"}]
+
+    with pytest.raises(Exception):
+        pipeline.run(
+            data_with_type_conflict(),
+            schema=dlt.Schema("fruitshop"),
+            table_name="items",
+            schema_contract={"data_type": "freeze"},  # Strict mode - fail on type conflicts
+        )
+
+    return pipeline
+
+
 def create_never_ran_pipeline(pipelines_dir: str = None):
     """Create a test pipeline with duckdb destination which never was run"""
     import duckdb
@@ -192,6 +225,7 @@ def create_no_destination_pipeline(pipelines_dir: str = None):
     create_success_pipeline_duckdb()
     create_success_pipeline_filesystem()
     create_extract_exception_pipeline()
+    create_normalize_exception_pipeline()
     create_never_ran_pipeline()
     create_load_exception_pipeline()
     create_no_destination_pipeline()
diff --git a/tests/workspace/helpers/dashboard/test_utils.py b/tests/workspace/helpers/dashboard/test_utils.py
@@ -1,8 +1,9 @@
-from typing import Optional, Set
+from typing import cast, Set, List, Dict, Any
 import os
 import tempfile
 from datetime import datetime
 from pathlib import Path
+import re
 
 import marimo as mo
 import pyarrow
@@ -45,6 +46,9 @@
     get_example_query_for_dataset,
     _get_steps_data_and_status,
     _get_migrations_count,
+    build_pipeline_execution_visualization,
+    _collect_load_packages_from_trace,
+    load_package_status_labels,
     TPipelineRunStatus,
     TVisualPipelineStep,
 )
@@ -53,6 +57,7 @@
     SUCCESS_PIPELINE_DUCKDB,
     SUCCESS_PIPELINE_FILESYSTEM,
     EXTRACT_EXCEPTION_PIPELINE,
+    NORMALIZE_EXCEPTION_PIPELINE,
     NEVER_RAN_PIPELINE,
     LOAD_EXCEPTION_PIPELINE,
     NO_DESTINATION_PIPELINE,
@@ -233,7 +238,7 @@ def test_pipeline_details(pipeline, temp_pipelines_dir):
     assert isinstance(result, list)
     if pipeline.pipeline_name in PIPELINES_WITH_LOAD:
         assert len(result) == 9
-    elif pipeline.pipeline_name == LOAD_EXCEPTION_PIPELINE:
+    elif pipeline.pipeline_name in [LOAD_EXCEPTION_PIPELINE, NORMALIZE_EXCEPTION_PIPELINE]:
         # custom destination does not support remote data info
         assert len(result) == 8
     else:
@@ -253,10 +258,10 @@ def test_pipeline_details(pipeline, temp_pipelines_dir):
     else:
         assert details_dict["destination"] == "duckdb (dlt.destinations.duckdb)"
     assert details_dict["dataset_name"] == pipeline.dataset_name
-    if (
-        pipeline.pipeline_name in PIPELINES_WITH_LOAD
-        or pipeline.pipeline_name == LOAD_EXCEPTION_PIPELINE
-    ):
+    if pipeline.pipeline_name in PIPELINES_WITH_LOAD or pipeline.pipeline_name in [
+        LOAD_EXCEPTION_PIPELINE,
+        NORMALIZE_EXCEPTION_PIPELINE,
+    ]:
         assert details_dict["schemas"].startswith("fruitshop")
     else:
         assert "schemas" not in details_dict
@@ -502,6 +507,10 @@ def test_trace(pipeline: dlt.Pipeline):
     if pipeline.pipeline_name == EXTRACT_EXCEPTION_PIPELINE:
         assert len(result) == 1
         assert result[0]["step"] == "extract"
+    elif pipeline.pipeline_name == NORMALIZE_EXCEPTION_PIPELINE:
+        assert len(result) == 2
+        assert result[0]["step"] == "extract"
+        assert result[1]["step"] == "normalize"
     else:
         assert len(result) == 3
         assert result[0]["step"] == "extract"
@@ -781,10 +790,7 @@ def test_get_steps_data_and_status(
 
 @pytest.mark.parametrize(
     "pipeline",
-    [
-        SUCCESS_PIPELINE_DUCKDB,
-        SUCCESS_PIPELINE_FILESYSTEM,
-    ],
+    PIPELINES_WITH_LOAD,
     indirect=True,
 )
 def test_get_migrations_count(pipeline: dlt.Pipeline) -> None:
@@ -794,15 +800,122 @@ def test_get_migrations_count(pipeline: dlt.Pipeline) -> None:
     assert migrations_count == 1
 
     # Trigger multiple migrations
-    pipeline.extract([{"id": 1, "name": "test"}], table_name="migration_table")
-    pipeline.extract(
-        [{"id": 2, "name": "test2", "new_column": "value"}], table_name="migration_table"
-    )
+    pipeline.extract([{"id": 1, "name": "test"}], table_name="my_table")
+    pipeline.extract([{"id": 2, "name": "test2", "new_column": "value"}], table_name="my_table")
     pipeline.extract(
         [{"id": 3, "name": "test3", "new_column": "value", "another_column": 100}],
-        table_name="migration_table",
+        table_name="my_table",
     )
     pipeline.normalize()
     pipeline.load()
     migrations_count = _get_migrations_count(pipeline.last_trace.last_load_info)
     assert migrations_count == 3
+
+
+@pytest.mark.parametrize(
+    "pipeline, expected_steps, expected_status",
+    [
+        (SUCCESS_PIPELINE_DUCKDB, {"extract", "normalize", "load"}, "succeeded"),
+        (SUCCESS_PIPELINE_FILESYSTEM, {"extract", "normalize", "load"}, "succeeded"),
+        (EXTRACT_EXCEPTION_PIPELINE, {"extract"}, "failed"),
+        (LOAD_EXCEPTION_PIPELINE, {"extract", "normalize", "load"}, "failed"),
+    ],
+    indirect=["pipeline"],
+)
+def test_build_pipeline_execution_visualization(
+    pipeline: dlt.Pipeline,
+    expected_steps: Set[TVisualPipelineStep],
+    expected_status: TPipelineRunStatus,
+) -> None:
+    """Test overall pipeline execution visualization logic"""
+
+    trace = pipeline.last_trace
+
+    html = build_pipeline_execution_visualization(trace)
+    html_str = str(html.text)
+
+    assert f"Last execution ID: <strong>{trace.transaction_id[:8]}</strong>" in html_str
+    total_time_match = re.search(
+        r"<div>Total time: <strong>([\d.]+)(ms|s)?</strong></div>", html_str
+    )
+    assert total_time_match is not None
+
+    status_badge = f"""
+    <div style="
+        background-color: var(--{'green' if expected_status == "succeeded" else 'red'}-bg);
+        color: var(--{'green' if expected_status == "succeeded" else 'red'}-text);
+        padding: 6px 16px;
+        border-radius: 6px;
+    ">
+        <strong>{expected_status}</strong>
+    </div>
+    """
+    assert status_badge in html_str
+
+    migrations_count = _get_migrations_count(trace.last_load_info) if trace.last_load_info else 0
+    migration_badge = f"""
+    <div style="
+        background-color: var(--yellow-bg);
+        color: var(--yellow-text);
+        padding: 6px 16px;
+        border-radius: 6px;
+    ">
+        <strong>{migrations_count} dataset migration(s)</strong>
+    </div>"""
+    if migrations_count != 0:
+        assert migration_badge in html_str
+    else:
+        assert migration_badge not in html_str
+
+    steps_data, _ = _get_steps_data_and_status(trace.steps)
+    for step_data in steps_data:
+        duration_pattern = re.search(rf"{step_data.step.capitalize()}\s+([\d.]+)(ms|s)?", html_str)
+        assert duration_pattern is not None
+
+    if "extract" in expected_steps:
+        assert "var(--dlt-color-lime)" in html_str
+    if "normalize" in expected_steps:
+        assert "var(--dlt-color-aqua)" in html_str
+    if "load" in expected_steps:
+        assert "var(--dlt-color-pink)" in html_str
+
+
+@pytest.mark.parametrize(
+    "pipeline",
+    [
+        SUCCESS_PIPELINE_DUCKDB,
+        SUCCESS_PIPELINE_FILESYSTEM,
+        EXTRACT_EXCEPTION_PIPELINE,
+        NORMALIZE_EXCEPTION_PIPELINE,
+        LOAD_EXCEPTION_PIPELINE,
+    ],
+    indirect=["pipeline"],
+)
+def test_collect_load_packages_from_trace(
+    pipeline: dlt.Pipeline,
+) -> None:
+    """Test getting load package status labels from trace"""
+
+    trace = pipeline.last_trace
+    table = load_package_status_labels(trace)
+
+    list_of_load_package_info = cast(List[Dict[str, Any]], table.data)
+
+    if pipeline.pipeline_name in ["success_pipeline_duckdb", "success_pipeline_filesystem"]:
+        assert len(list_of_load_package_info) == 2
+        assert all(
+            "loaded" in str(load_package_info["status"].text)
+            for load_package_info in list_of_load_package_info
+        )
+
+    elif pipeline.pipeline_name == "extract_exception_pipeline":
+        assert len(list_of_load_package_info) == 1
+        assert "new" in str(list_of_load_package_info[0]["status"].text)
+
+    elif pipeline.pipeline_name == "load_exception_pipeline":
+        assert len(list_of_load_package_info) == 1
+        assert "aborted" in str(list_of_load_package_info[0]["status"].text)
+
+    elif pipeline.pipeline_name == "normalize_exception_pipeline":
+        assert len(list_of_load_package_info) == 1
+        assert "pending" in str(list_of_load_package_info[0]["status"].text)