-
Notifications
You must be signed in to change notification settings - Fork 415
feat: extend TTableReference
#3093
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
8f43d48
fc94598
3a21ea8
c4df18c
113162c
8bfd479
5613df6
1ebc860
fc0dff8
9b41628
3fa5445
eb17f16
80f954e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ | |
| NewType, | ||
| Union, | ||
| ) | ||
| from typing_extensions import Never | ||
| from typing_extensions import Never, NotRequired, Required | ||
|
|
||
| from dlt.common.data_types import TDataType | ||
| from dlt.common.normalizers.typing import TNormalizersConfig | ||
|
|
@@ -276,14 +276,88 @@ class TScd2StrategyDict(TMergeDispositionDict, total=False): | |
| ] | ||
|
|
||
|
|
||
| class TTableReference(TypedDict): | ||
| TReferenceCardinality = Literal[ | ||
| "zero_to_one", | ||
| "one_to_zero", | ||
| "zero_to_many", | ||
| "many_to_zero", | ||
| "one_to_many", | ||
| "many_to_one", | ||
| "one_to_one", | ||
| "many_to_many", | ||
| ] | ||
| """Represents cardinality between `column` (left) and `referenced_column` (right) | ||
|
|
||
| Note that cardinality is not symmetric. For example: | ||
| - `Author, 0 to many, Book` an author can have 0 to many book | ||
| - `Book, 1 to 1, Author` a book must have exactly 1 author | ||
|
|
||
| The statement (Author, 0 to many, Book) doesn't imply (Book, many to 0, Author). | ||
| """ | ||
|
|
||
|
|
||
| class TTableReference(TypedDict, total=False): | ||
| """Describes a reference to another table's columns. | ||
| `columns` corresponds to the `referenced_columns` in the referenced table and their order should match. | ||
| """ | ||
|
|
||
| label: Optional[str] | ||
| """Text providing semantic information about the reference. | ||
|
|
||
| For example, the label "liked" describe the relationship between `user` and `post` (user.id, "liked", post.id) | ||
| """ | ||
|
|
||
| cardinality: Optional[TReferenceCardinality] | ||
| """Cardinality of the relationship between `table.column` (left) and `referenced_table.referenced_column` (right).""" | ||
|
|
||
| table: Optional[str] | ||
| """Name of the table. | ||
| When `TTableReference` is defined on a `TTableSchema` (i.e., "inline reference"), the `table` | ||
| value is determined by `TTableSchema["name"]` | ||
| """ | ||
|
|
||
| columns: Sequence[str] | ||
| """Name of the column(s) from `table`""" | ||
|
|
||
| referenced_table: str | ||
| """Name of the referenced table""" | ||
|
|
||
| referenced_columns: Sequence[str] | ||
| """Name of the columns(s) from `referenced_table`""" | ||
|
|
||
|
|
||
| TInlineTableReference = TTableReference | ||
|
|
||
|
|
||
| # Compared to `TTableReference` or `TInlineTableReference`, `table` is required | ||
| class TStandaloneTableReference(TypedDict, total=False): | ||
|
||
| """Describes a reference to another table's columns. | ||
| `columns` corresponds to the `referenced_columns` in the referenced table and their order should match. | ||
| """ | ||
|
|
||
| label: Optional[str] | ||
| """Text providing semantic information about the reference. | ||
|
|
||
| For example, the label "liked" describe the relationship between `user` and `post` (user.id, "liked", post.id) | ||
| """ | ||
|
|
||
| cardinality: Optional[TReferenceCardinality] | ||
| """Cardinality of the relationship between `table.column` (left) and `referenced_table.referenced_column` (right).""" | ||
|
|
||
| table: str | ||
| """Name of the table. | ||
| When `TTableReference` is defined on a `TTableSchema` (i.e., "inline reference"), the `table` | ||
| value is determined by `TTableSchema["name"]` | ||
| """ | ||
|
|
||
| columns: Sequence[str] | ||
| """Name of the column(s) from `table`""" | ||
|
|
||
| referenced_table: str | ||
| """Name of the referenced table""" | ||
|
|
||
| referenced_columns: Sequence[str] | ||
| """Name of the columns(s) from `referenced_table`""" | ||
|
|
||
|
|
||
| TTableReferenceParam = Sequence[TTableReference] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -980,12 +980,35 @@ def create_root_child_reference(tables: TSchemaTables, table_name: str) -> TTabl | |
| root_row_key: str = get_first_column_name_with_prop(root_table, "row_key") | ||
|
|
||
| return TTableReference( | ||
| label="_dlt_root", | ||
| cardinality="many_to_one", | ||
| table=table_name, | ||
| columns=[child_root_key], | ||
| referenced_table=root_table["name"], | ||
| referenced_columns=[root_row_key], | ||
| ) | ||
|
|
||
|
|
||
| def get_all_root_child_references_from_root( | ||
| tables: TSchemaTables, table_name: str | ||
| ) -> list[TTableReference]: | ||
| root_table = tables.get(table_name) | ||
| if is_nested_table(root_table) is True: | ||
| raise ValueError(f"Table `{table_name}` is not a root table.") | ||
|
|
||
| children_refs = [] | ||
| # skip the first table in chain, which is the root; i.e., the current one | ||
| for child_table in get_nested_tables(tables, table_name)[1:]: | ||
| # try/except because a root table may or may not have child with `root_key` enabled | ||
| try: | ||
| child_ref = create_root_child_reference(tables, child_table["name"]) | ||
| children_refs.append(child_ref) | ||
| except ValueError: | ||
| pass | ||
|
|
||
| return children_refs | ||
|
|
||
|
|
||
| def create_parent_child_reference(tables: TSchemaTables, table_name: str) -> TTableReference: | ||
| """Create a Reference between `{table}.{parent_key}` and `{parent}.{row_key}`""" | ||
| child_table = tables.get(table_name) | ||
|
|
@@ -996,25 +1019,53 @@ def create_parent_child_reference(tables: TSchemaTables, table_name: str) -> TTa | |
|
|
||
| parent_table_name = child_table.get("parent") | ||
| if parent_table_name is None: | ||
| raise ValueError(f"No parent table found for `{table_name=:}`") | ||
| raise ValueError(f"Table `{table_name}` is a root table and has no parent.") | ||
| parent_table = tables.get(parent_table_name) | ||
|
|
||
| child_parent_key: str = get_first_column_name_with_prop(child_table, "parent_key") | ||
| parent_row_key: str = get_first_column_name_with_prop(parent_table, "row_key") | ||
|
|
||
| return TTableReference( | ||
| label="_dlt_parent", | ||
| cardinality="many_to_one", | ||
| table=table_name, | ||
| columns=[child_parent_key], | ||
| referenced_table=parent_table_name, | ||
| referenced_columns=[parent_row_key], | ||
| ) | ||
|
|
||
|
|
||
| def get_all_parent_child_references_from_root( | ||
| tables: TSchemaTables, table_name: str | ||
| ) -> list[TTableReference]: | ||
| root_table = tables.get(table_name) | ||
| if is_nested_table(root_table) is True: | ||
| raise ValueError(f"Table `{table_name}` is not a root table.") | ||
|
|
||
| children_refs = [] | ||
| # skip the first table in chain, which is the root; i.e., the current one | ||
| for child_table in get_nested_tables(tables, table_name)[1:]: | ||
| # try/except because a root table may or may not have child with `root_key` enabled | ||
| try: | ||
| child_ref = create_parent_child_reference(tables, child_table["name"]) | ||
| children_refs.append(child_ref) | ||
| except ValueError: | ||
| pass | ||
|
|
||
| return children_refs | ||
|
|
||
|
|
||
| def create_load_table_reference(table: TTableSchema) -> TTableReference: | ||
| """Create a Reference between `{table}._dlt_oad_id` and `_dlt_loads.load_id`""" | ||
| """Create a Reference between `{table}._dlt_load_id` and `_dlt_loads.load_id`""" | ||
| if table["columns"].get(C_DLT_LOAD_ID) is None: | ||
| raise ValueError(f"Column `{C_DLT_LOAD_ID}` not found for `table_name={table['name']}`") | ||
| raise ValueError( | ||
| f"Table `{table['name']}` is not a root table and has no `{C_DLT_LOAD_ID}` column." | ||
| ) | ||
|
|
||
| return TTableReference( | ||
| label="_dlt_load", | ||
| cardinality="zero_to_many", | ||
| table=table["name"], | ||
| columns=[C_DLT_LOAD_ID], | ||
|
||
| referenced_table=LOADS_TABLE_NAME, | ||
| referenced_columns=[C_DLT_LOADS_TABLE_LOAD_ID], | ||
|
|
@@ -1031,6 +1082,9 @@ def create_version_and_loads_hash_reference(tables: TSchemaTables) -> TTableRefe | |
| raise ValueError(f"Table `{LOADS_TABLE_NAME}` not found in tables: `{list(tables.keys())}`") | ||
|
|
||
| return TTableReference( | ||
| label="_dlt_schema_version", | ||
| cardinality="one_to_many", | ||
| table=VERSION_TABLE_NAME, | ||
| columns=["version_hash"], | ||
| referenced_table=LOADS_TABLE_NAME, | ||
| referenced_columns=["schema_version_hash"], | ||
|
|
@@ -1046,12 +1100,14 @@ def create_version_and_loads_schema_name_reference(tables: TSchemaTables) -> TTa | |
| if LOADS_TABLE_NAME not in tables: | ||
| raise ValueError(f"Table `{LOADS_TABLE_NAME}` not found in tables: `{list(tables.keys())}`") | ||
|
|
||
| loads_and_version_hash_schema_name_ref = TTableReference( | ||
| return TTableReference( | ||
| label="_dlt_schema_name", | ||
| cardinality="many_to_many", | ||
| table=VERSION_TABLE_NAME, | ||
| columns=["schema_name"], | ||
| referenced_table=LOADS_TABLE_NAME, | ||
| referenced_columns=["schema_name"], | ||
| ) | ||
| return loads_and_version_hash_schema_name_ref | ||
|
|
||
|
|
||
| def migrate_complex_types(table: TTableSchema, warn: bool = False) -> None: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.