Skip to content

Commit 3be0857

Browse files
authored
feat: dlt.Schema.to_dot() graphviz export (#2959)
* graphviz renderer added * dlt.Schema._repr_html_ added * updated docs * update CLI docs * updated linting rule * added tests for formatting kwargs * added utility to validate dot
1 parent 6348115 commit 3be0857

File tree

11 files changed

+1445
-7
lines changed

11 files changed

+1445
-7
lines changed

dlt/cli/command_wrappers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ def schema_command_wrapper(file_path: str, format_: str, remove_defaults: bool)
145145
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults)
146146
elif format_ == "dbml":
147147
schema_str = s.to_dbml()
148+
elif format_ == "dot":
149+
schema_str = s.to_dot()
148150
else:
149151
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults)
150152

dlt/cli/pipeline_command.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:
354354
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults_)
355355
elif format_ == "dbml":
356356
schema_str = s.to_dbml()
357+
elif format_ == "dot":
358+
schema_str = s.to_dot()
357359
else:
358360
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults_)
359361

dlt/common/schema/schema.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,18 @@ def __repr__(self) -> str:
688688
}
689689
return simple_repr("dlt.Schema", **without_none(kwargs))
690690

691+
def _repr_html_(self, **kwargs: Any) -> str:
692+
"""Render the Schema has a graphviz graph and display it using HTML
693+
694+
This method is automatically called by notebooks renderers (IPython, marimo, etc.)
695+
ref: https://ipython.readthedocs.io/en/stable/config/integrating.html
696+
697+
`dlt.helpers.graphviz.render_with_html()` has not external Python or system dependencies.
698+
"""
699+
from dlt.helpers.graphviz import _render_dot_with_html
700+
701+
return _render_dot_with_html(self.to_dot(**kwargs))
702+
691703
def to_dict(
692704
self,
693705
remove_defaults: bool = False,
@@ -768,6 +780,49 @@ def to_dbml(
768780
)
769781
return str(dbml_schema.dbml)
770782

783+
def to_dot(
784+
self,
785+
remove_processing_hints: bool = False,
786+
include_dlt_tables: bool = True,
787+
include_internal_dlt_ref: bool = True,
788+
include_parent_child_ref: bool = True,
789+
include_root_child_ref: bool = True,
790+
group_by_resource: bool = False,
791+
) -> str:
792+
"""Convert schema to a Graphviz DOT string.
793+
794+
Args:
795+
remove_processing_hints: If True, remove hints used for data processing and redundant information.
796+
This reduces the size of the schema and improves readability.
797+
include_dlt_tables: If True, include data tables and internal dlt tables. This will influence table
798+
references and groups produced.
799+
include_internal_dlt_ref: If True, include references between tables `_dlt_version`, `_dlt_loads` and `_dlt_pipeline_state`
800+
include_parent_child_ref: If True, include references from `child._dlt_parent_id` to `parent._dlt_id`
801+
include_root_child_ref: If True, include references from `child._dlt_root_id` to `root._dlt_id`
802+
group_by_resource: If True, group tables by resource and create subclusters.
803+
804+
Returns:
805+
A DOT string of the schema
806+
"""
807+
from dlt.helpers.graphviz import schema_to_graphviz
808+
809+
stored_schema = self.to_dict(
810+
# setting this to `True` removes `name` fields that are used in `schema_to_dbml()`
811+
# if required, we can refactor `dlt.helpers.dbml` to support this
812+
remove_defaults=False,
813+
remove_processing_hints=remove_processing_hints,
814+
)
815+
816+
dot = schema_to_graphviz(
817+
stored_schema,
818+
include_dlt_tables=include_dlt_tables,
819+
include_internal_dlt_ref=include_internal_dlt_ref,
820+
include_parent_child_ref=include_parent_child_ref,
821+
include_root_child_ref=include_root_child_ref,
822+
group_by_resource=group_by_resource,
823+
)
824+
return dot
825+
771826
def clone(
772827
self,
773828
with_name: str = None,

dlt/common/storages/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from dlt.common.utils import digest128
2323

2424

25-
TSchemaFileFormat = Literal["json", "yaml", "dbml"]
25+
TSchemaFileFormat = Literal["json", "yaml", "dbml", "dot"]
2626
SCHEMA_FILES_EXTENSIONS = get_args(TSchemaFileFormat)
2727

2828

dlt/common/storages/schema_storage.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ def _parse_schema_str(schema_str: str, extension: TSchemaFileFormat) -> DictStrA
281281
elif extension == "yaml":
282282
imported_schema = yaml.safe_load(schema_str)
283283
elif extension == "dbml":
284-
raise ValueError(extension, "Schema parser for dbml not yet implemented")
284+
raise ValueError(extension, "Schema parser for `dbml` not yet implemented")
285+
elif extension == "dot":
286+
raise ValueError(extension, "Schema parser for `dot` not yet implemented")
285287
else:
286288
raise ValueError(extension)
287289
return imported_schema

0 commit comments

Comments
 (0)