Skip to content

Commit 382eb6b

Browse files
ziltoheaven00
andauthored
feat: Schema.to_mermaid() (#3364)
* Add dlt.Schema.to_mermaid() method --------- Co-authored-by: jayant <[email protected]>
1 parent 661c6c1 commit 382eb6b

File tree

9 files changed

+777
-6
lines changed

9 files changed

+777
-6
lines changed

dlt/_workspace/cli/_pipeline_command.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:
378378
schema_str = s.to_dbml()
379379
elif format_ == "dot":
380380
schema_str = s.to_dot()
381+
elif format_ == "mermaid":
382+
schema_str = s.to_mermaid()
381383
else:
382384
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults_)
383385

dlt/_workspace/cli/commands.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,8 @@ def schema_command_wrapper(file_path: str, format_: str, remove_defaults: bool)
489489
schema_str = s.to_dbml()
490490
elif format_ == "dot":
491491
schema_str = s.to_dot()
492+
elif format == "mermaid":
493+
schema_str = s.to_mermaid()
492494
else:
493495
schema_str = s.to_pretty_yaml(remove_defaults=remove_defaults)
494496

dlt/common/schema/schema.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,42 @@ def to_dot(
795795
)
796796
return dot
797797

798+
def to_mermaid(
799+
self,
800+
remove_processing_hints: bool = False,
801+
hide_columns: bool = False,
802+
hide_descriptions: bool = False,
803+
include_dlt_tables: bool = True,
804+
) -> str:
805+
"""Convert schema to a Mermaid diagram string.
806+
Args:
807+
remove_processing_hints: If True, remove hints used for data processing and redundant information.
808+
This reduces the size of the schema and improves readability.
809+
hide_columns: If True, the diagram hides columns details. This helps readability of large diagrams.
810+
hide_descriptions: If True, hide the column descriptions
811+
include_dlt_tables: If `True` (the default), internal dlt tables (`_dlt_version`,
812+
`_dlt_loads`, `_dlt_pipeline_state`)
813+
814+
Returns:
815+
A string containing a Mermaid ERdiagram of the schema.
816+
"""
817+
from dlt.helpers.mermaid import schema_to_mermaid
818+
819+
stored_schema = self.to_dict(
820+
# setting this to `True` removes `name` fields that are used in `schema_to_dbml()`
821+
# if required, we can refactor `dlt.helpers.dbml` to support this
822+
remove_defaults=False,
823+
remove_processing_hints=remove_processing_hints,
824+
)
825+
826+
return schema_to_mermaid(
827+
stored_schema,
828+
references=self.references,
829+
hide_columns=hide_columns,
830+
hide_descriptions=hide_descriptions,
831+
include_dlt_tables=include_dlt_tables,
832+
)
833+
798834
def clone(
799835
self,
800836
with_name: str = None,

dlt/common/storages/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from dlt.common.utils import digest128
2323

2424

25-
TSchemaFileFormat = Literal["json", "yaml", "dbml", "dot"]
25+
TSchemaFileFormat = Literal["json", "yaml", "dbml", "dot", "mermaid"]
2626
SCHEMA_FILES_EXTENSIONS = get_args(TSchemaFileFormat)
2727

2828

dlt/common/storages/schema_storage.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ def _parse_schema_str(schema_str: str, extension: TSchemaFileFormat) -> DictStrA
284284
raise ValueError(extension, "Schema parser for `dbml` not yet implemented")
285285
elif extension == "dot":
286286
raise ValueError(extension, "Schema parser for `dot` not yet implemented")
287+
elif extension == "mermaid":
288+
raise ValueError(extension, "Schema parser for `mermaid` not yet implemented")
287289
else:
288290
raise ValueError(extension)
289291
return imported_schema

dlt/helpers/mermaid.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""Build a mermaid graph representation using raw strings without additional dependencies"""
2+
from enum import Enum
3+
4+
from dlt.common.schema.typing import (
5+
TColumnSchema,
6+
TReferenceCardinality,
7+
TStoredSchema,
8+
TTableReferenceStandalone,
9+
TTableSchema,
10+
)
11+
12+
13+
INDENT = " "
14+
15+
16+
def schema_to_mermaid(
17+
schema: TStoredSchema,
18+
*,
19+
references: list[TTableReferenceStandalone],
20+
hide_columns: bool = False,
21+
hide_descriptions: bool = False,
22+
include_dlt_tables: bool = True,
23+
) -> str:
24+
mermaid_er_diagram = "erDiagram\n"
25+
26+
for table_name, table_schema in schema["tables"].items():
27+
if not include_dlt_tables and table_name.startswith("_dlt"):
28+
continue
29+
30+
mermaid_er_diagram += INDENT + _to_mermaid_table(
31+
table_schema,
32+
hide_columns=hide_columns,
33+
hide_descriptions=hide_descriptions,
34+
)
35+
36+
for ref in references:
37+
if not include_dlt_tables:
38+
if ref["table"].startswith("_dlt") or ref["referenced_table"].startswith("_dlt"):
39+
continue
40+
41+
mermaid_er_diagram += INDENT + _to_mermaid_reference(ref)
42+
43+
return mermaid_er_diagram
44+
45+
46+
def _to_mermaid_table(
47+
table: TTableSchema, hide_columns: bool = False, hide_descriptions: bool = False
48+
) -> str:
49+
mermaid_table: str = table["name"]
50+
mermaid_table += "{\n"
51+
52+
if hide_columns is False:
53+
for column in table["columns"].values():
54+
mermaid_table += INDENT + _to_mermaid_column(
55+
column,
56+
hide_descriptions=hide_descriptions,
57+
)
58+
59+
mermaid_table += "}\n"
60+
return mermaid_table
61+
62+
63+
# TODO add scale & precision to `data_type`
64+
def _to_mermaid_column(column: TColumnSchema, hide_descriptions: bool = False) -> str:
65+
mermaid_col = column["data_type"] + " " + column["name"]
66+
keys = []
67+
if column.get("primary_key"):
68+
keys.append("PK")
69+
70+
if column.get("unique"):
71+
keys.append("UK")
72+
73+
if keys:
74+
mermaid_col += " " + ",".join(keys)
75+
76+
if hide_descriptions is False:
77+
if description := column.get("description"):
78+
mermaid_col += f' "{description}"'
79+
80+
mermaid_col += "\n"
81+
return mermaid_col
82+
83+
84+
class TMermaidArrows(str, Enum):
85+
ONE_TO_MANY = "||--|{"
86+
MANY_TO_ONE = "}|--||"
87+
ZERO_TO_MANY = "|o--|{"
88+
MANY_TO_ZERO = "}|--o|"
89+
ONE_TO_MORE = "||--o{"
90+
MORE_TO_ONE = "}o--||"
91+
ONE_TO_ONE = "||--||"
92+
MANY_TO_MANY = "}|--|{"
93+
ZERO_TO_ONE = "|o--o|"
94+
95+
96+
_CARDINALITY_ARROW: dict[TReferenceCardinality, TMermaidArrows] = {
97+
"one_to_many": TMermaidArrows.ONE_TO_MANY,
98+
"many_to_one": TMermaidArrows.MANY_TO_ONE,
99+
"zero_to_many": TMermaidArrows.ZERO_TO_MANY,
100+
"many_to_zero": TMermaidArrows.MANY_TO_ZERO,
101+
"one_to_one": TMermaidArrows.ONE_TO_ONE,
102+
"many_to_many": TMermaidArrows.MANY_TO_MANY,
103+
"zero_to_one": TMermaidArrows.ZERO_TO_ONE,
104+
"one_to_zero": TMermaidArrows.ZERO_TO_ONE,
105+
}
106+
107+
108+
def _to_mermaid_reference(ref: TTableReferenceStandalone) -> str:
109+
"""Builds references in the following format using cardinality and label to describe
110+
the relationship
111+
112+
<left-entity> [<relationship> <right-entity> : <relationship-label>]
113+
"""
114+
left_table = ref.get("table")
115+
right_table = ref.get("referenced_table")
116+
cardinality = ref.get("cardinality", "one_to_many")
117+
label = ref.get("label", '""')
118+
arrow: str = _CARDINALITY_ARROW.get(cardinality).value
119+
120+
mermaid_reference = f"{left_table} {arrow} {right_table}"
121+
if label:
122+
mermaid_reference += f" : {label}"
123+
124+
mermaid_reference += "\n"
125+
return mermaid_reference

docs/website/docs/general-usage/dataset-access/view-dlt-schema.md

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ TableGroup "_dlt" {
837837

838838

839839
## Export to Graphviz
840-
[Graphviz](https://www.graphviz.org/) is an open soruce graph visualization engine which uses the [DOT language](https://graphviz.org/doc/info/lang.html). dlt allows you to export your `dlt.Schema` as DOT string, which can be rendered using the Python `graphviz` library, lightweight JS libraries (e.g., [d3-graphviz](https://github.com/magjac/d3-graphviz)), or IDE extensions.
840+
[Graphviz](https://www.graphviz.org/) is an open source graph visualization engine which uses the [DOT language](https://graphviz.org/doc/info/lang.html). dlt allows you to export your `dlt.Schema` as DOT string, which can be rendered using the Python `graphviz` library, lightweight JS libraries (e.g., [d3-graphviz](https://github.com/magjac/d3-graphviz)), or IDE extensions.
841841

842842
Note that the conversion is lossy. You can't fully recreate `dlt.Schema` from a DOT string.
843843

@@ -1278,3 +1278,74 @@ _dlt_version:f4:_ -> _dlt_loads:f2:_ [dir=both, penwidth=1, color="#1c1c34", arr
12781278
</details>
12791279

12801280
![graphviz dot render](https://storage.googleapis.com/dlt-blog-images/schema_dot_export.svg)
1281+
1282+
1283+
## Export to Mermaid
1284+
1285+
[Mermaid](https://www.mermaidchart.com/) is a widely-supported diagramming language. dlt allows you to export your `dlt.Schema` as Mermaid string. This can natively rendered by many tools (GitHub markdown, Notion, marimo notebooks).
1286+
1287+
Note that the conversion is lossy. You can't fully recreate `dlt.Schema` from a Mermaid string.
1288+
1289+
```py
1290+
schema_mermaid = pipeline.default_schema.to_mermaid()
1291+
```
1292+
1293+
```sh
1294+
# `chess_pipeline` is the name of the pipeline
1295+
dlt pipeline chess_pipeline schema --format mermaid
1296+
```
1297+
1298+
<details>
1299+
<summary>See Mermaid</summary>
1300+
1301+
```mermaid
1302+
erDiagram
1303+
_dlt_version{
1304+
bigint version
1305+
bigint engine_version
1306+
timestamp inserted_at
1307+
text schema_name
1308+
text version_hash
1309+
text schema
1310+
}
1311+
_dlt_loads{
1312+
text load_id
1313+
text schema_name
1314+
bigint status
1315+
timestamp inserted_at
1316+
text schema_version_hash
1317+
}
1318+
customers{
1319+
bigint id PK
1320+
text name
1321+
text city
1322+
text _dlt_load_id
1323+
text _dlt_id UK
1324+
}
1325+
purchases{
1326+
bigint id PK
1327+
bigint customer_id
1328+
bigint inventory_id
1329+
bigint quantity
1330+
text date
1331+
text _dlt_load_id
1332+
text _dlt_id UK
1333+
}
1334+
_dlt_pipeline_state{
1335+
bigint version
1336+
bigint engine_version
1337+
text pipeline_name
1338+
text state
1339+
timestamp created_at
1340+
text version_hash
1341+
text _dlt_load_id
1342+
text _dlt_id UK
1343+
}
1344+
customers }|--|| _dlt_loads : _dlt_load
1345+
purchases }|--|| _dlt_loads : _dlt_load
1346+
purchases ||--|{ customers : ""
1347+
_dlt_pipeline_state }|--|| _dlt_loads : _dlt_load
1348+
```
1349+
</details>
1350+
1351+
![mermaid render](https://storage.googleapis.com/dlt-blog-images/schema_mermaid_export.png)

docs/website/docs/reference/command-line-interface.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ Shows, converts and upgrades schemas.
8989

9090
**Usage**
9191
```sh
92-
dlt schema [-h] [--format {json,yaml,dbml,dot}] [--remove-defaults] file
92+
dlt schema [-h] [--format {json,yaml,dbml,dot,mermaid}] [--remove-defaults] file
9393
```
9494

9595
**Description**
@@ -107,7 +107,7 @@ Inherits arguments from [`dlt`](#dlt).
107107

108108
**Options**
109109
* `-h, --help` - Show this help message and exit
110-
* `--format {json,yaml,dbml,dot}` - Display schema in this format
110+
* `--format {json,yaml,dbml,dot,mermaid}` - Display schema in this format
111111
* `--remove-defaults` - Does not show default hint values
112112

113113
</details>
@@ -334,7 +334,7 @@ Displays default schema.
334334

335335
**Usage**
336336
```sh
337-
dlt pipeline [pipeline_name] schema [-h] [--format {json,yaml,dbml,dot}]
337+
dlt pipeline [pipeline_name] schema [-h] [--format {json,yaml,dbml,dot,mermaid}]
338338
[--remove-defaults]
339339
```
340340

@@ -350,7 +350,7 @@ Inherits arguments from [`dlt pipeline`](#dlt-pipeline).
350350

351351
**Options**
352352
* `-h, --help` - Show this help message and exit
353-
* `--format {json,yaml,dbml,dot}` - Display schema in this format
353+
* `--format {json,yaml,dbml,dot,mermaid}` - Display schema in this format
354354
* `--remove-defaults` - Does not show default hint values
355355

356356
</details>

0 commit comments

Comments
 (0)