Skip to content

Commit d4340d8

Browse files
steinitzuoonyoontongrudolfix
authored
Fix databricks pandas error (#1443)
* update dependencies for databricks/dbt * use kwargs if args not defined, fix typing * Revert to use inline params to keep support for 13.x cluster * Typing fix * adds dbt support for mssql * converts dbt deps from extra to group, allows databricks client >2.9.3 * fixes dict to env util * limits dbt version to <1.8 in destination tests * skips chess dbt package for mssql --------- Co-authored-by: Oon Tong Tan <[email protected]> Co-authored-by: Marcin Rudolf <[email protected]>
1 parent a9021fe commit d4340d8

File tree

15 files changed

+262
-252
lines changed

15 files changed

+262
-252
lines changed

.github/workflows/lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959

6060
- name: Install dependencies
6161
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
62-
run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk
62+
run: poetry install --all-extras --with airflow,providers,pipeline,sentry-sdk,dbt
6363

6464
- name: Run make lint
6565
run: |

.github/workflows/test_dbt_runner.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060

6161
- name: Install dependencies
6262
# install dlt with postgres support
63-
run: poetry install --no-interaction -E postgres -E dbt --with sentry-sdk
63+
run: poetry install --no-interaction -E postgres --with sentry-sdk,dbt
6464

6565
- name: create secrets.toml
6666
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ has-poetry:
4444
poetry --version
4545

4646
dev: has-poetry
47-
poetry install --all-extras --with airflow --with docs --with providers --with pipeline --with sentry-sdk
47+
poetry install --all-extras --with airflow,docs,providers,pipeline,sentry-sdk,dbt
4848

4949
lint:
5050
./tools/check-package.sh

dlt/common/configuration/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,10 @@ def add_config_to_env(config: BaseConfiguration, sections: Tuple[str, ...] = ())
178178

179179

180180
def add_config_dict_to_env(
181-
dict_: Mapping[str, Any], sections: Tuple[str, ...] = (), overwrite_keys: bool = False
181+
dict_: Mapping[str, Any],
182+
sections: Tuple[str, ...] = (),
183+
overwrite_keys: bool = False,
184+
destructure_dicts: bool = True,
182185
) -> None:
183186
"""Writes values in dict_ back into environment using the naming convention of EnvironProvider. Applies `sections` if specified. Does not overwrite existing keys by default"""
184187
for k, v in dict_.items():
@@ -193,5 +196,12 @@ def add_config_dict_to_env(
193196
if env_key not in os.environ or overwrite_keys:
194197
if v is None:
195198
os.environ.pop(env_key, None)
199+
elif isinstance(v, dict) and destructure_dicts:
200+
add_config_dict_to_env(
201+
v,
202+
sections + (k,),
203+
overwrite_keys=overwrite_keys,
204+
destructure_dicts=destructure_dicts,
205+
)
196206
else:
197207
os.environ[env_key] = serialize_value(v)

dlt/destinations/impl/databricks/databricks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def __init__(self, schema: Schema, config: DatabricksClientConfiguration) -> Non
264264
sql_client = DatabricksSqlClient(config.normalize_dataset_name(schema), config.credentials)
265265
super().__init__(schema, config, sql_client)
266266
self.config: DatabricksClientConfiguration = config
267-
self.sql_client: DatabricksSqlClient = sql_client
267+
self.sql_client: DatabricksSqlClient = sql_client # type: ignore[assignment]
268268
self.type_mapper = DatabricksTypeMapper(self.capabilities)
269269

270270
def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob:

dlt/destinations/impl/databricks/sql_client.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from contextlib import contextmanager, suppress
22
from typing import Any, AnyStr, ClassVar, Iterator, Optional, Sequence, List, Union, Dict
33

4+
45
from databricks import sql as databricks_lib
56
from databricks.sql.client import (
67
Connection as DatabricksSqlConnection,
@@ -37,7 +38,9 @@ def __init__(self, dataset_name: str, credentials: DatabricksCredentials) -> Non
3738

3839
def open_connection(self) -> DatabricksSqlConnection:
3940
conn_params = self.credentials.to_connector_params()
40-
self._conn = databricks_lib.connect(**conn_params, schema=self.dataset_name)
41+
self._conn = databricks_lib.connect(
42+
**conn_params, schema=self.dataset_name, use_inline_params="silent"
43+
)
4144
return self._conn
4245

4346
@raise_open_connection_error
@@ -87,12 +90,14 @@ def execute_sql(
8790
@contextmanager
8891
@raise_database_error
8992
def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DBApiCursor]:
90-
curr: DBApiCursor = None
91-
# TODO: databricks connector 3.0.0 will use :named paramstyle only
93+
curr: DBApiCursor
94+
# TODO: Inline param support will be dropped in future databricks driver, switch to :named paramstyle
95+
# This will drop support for cluster runtime v13.x
96+
# db_args: Optional[Dict[str, Any]]
9297
# if args:
9398
# keys = [f"arg{i}" for i in range(len(args))]
9499
# # Replace position arguments (%s) with named arguments (:arg0, :arg1, ...)
95-
# # query = query % tuple(f":{key}" for key in keys)
100+
# query = query % tuple(f":{key}" for key in keys)
96101
# db_args = {}
97102
# for key, db_arg in zip(keys, args):
98103
# # Databricks connector doesn't accept pendulum objects
@@ -102,15 +107,10 @@ def execute_query(self, query: AnyStr, *args: Any, **kwargs: Any) -> Iterator[DB
102107
# db_arg = to_py_date(db_arg)
103108
# db_args[key] = db_arg
104109
# else:
105-
# db_args = None
106-
db_args: Optional[Union[Dict[str, Any], Sequence[Any]]]
107-
if kwargs:
108-
db_args = kwargs
109-
elif args:
110-
db_args = args
111-
else:
112-
db_args = None
113-
with self._conn.cursor() as curr:
110+
# db_args = kwargs or None
111+
112+
db_args = args or kwargs or None
113+
with self._conn.cursor() as curr: # type: ignore[assignment]
114114
curr.execute(query, db_args)
115115
yield DBApiCursorImpl(curr) # type: ignore[abstract]
116116

dlt/extract/decorators.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,6 @@ def decorator(
509509
SPEC, resolvable_fields = spec_from_signature(
510510
f, inspect.signature(f), include_defaults=standalone
511511
)
512-
print(SPEC, resolvable_fields, standalone)
513512
if is_inner_resource and not standalone:
514513
if len(resolvable_fields) > 0:
515514
# prevent required arguments to inner functions that are not standalone

dlt/helpers/dbt/profiles.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,23 @@ athena:
144144
work_group: "{{ env_var('DLT__ATHENA_WORK_GROUP', '') }}"
145145

146146

147+
mssql:
148+
target: analytics
149+
outputs:
150+
analytics:
151+
type: sqlserver
152+
driver: "{{ env_var('DLT__CREDENTIALS__DRIVER') }}"
153+
server: "{{ env_var('DLT__CREDENTIALS__HOST') }}"
154+
port: "{{ env_var('DLT__CREDENTIALS__PORT') | as_number }}"
155+
database: "{{ env_var('DLT__CREDENTIALS__DATABASE') }}"
156+
schema: "{{ var('destination_dataset_name', var('source_dataset_name')) }}"
157+
user: "{{ env_var('DLT__CREDENTIALS__USERNAME') }}"
158+
password: "{{ env_var('DLT__CREDENTIALS__PASSWORD') }}"
159+
login_timeout: "{{ env_var('DLT__CREDENTIALS__CONNECT_TIMEOUT', '0') | as_number }}"
160+
encrypt: "{{ (env_var('DLT__CREDENTIALS__QUERY__ENCRYPT', 'No') == 'yes') | as_bool }}"
161+
trust_cert: "{{ (env_var('DLT__CREDENTIALS__QUERY__TRUSTSERVERCERTIFICATE', 'yes') == 'yes') | as_bool }}"
162+
163+
147164
# commented out because dbt for Synapse isn't currently properly supported.
148165
# Leave config here for potential future use.
149166
# synapse:
@@ -157,7 +174,7 @@ athena:
157174
# database: "{{ env_var('DLT__CREDENTIALS__DATABASE') }}"
158175
# schema: "{{ var('destination_dataset_name', var('source_dataset_name')) }}"
159176
# user: "{{ env_var('DLT__CREDENTIALS__USERNAME') }}"
160-
# password: "{{ env_var('DLT__CREDENTIALS__PASSWORD') }}"
177+
# password: "{{ env_var('DLT__CREDENTIALS__PASSWORD') }}"
161178

162179

163180
databricks:

docs/examples/chess/dbt_transform/models/load_ids.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
{% else %}
1313
-- take only loads with status = 0 and no other records
1414
SELECT load_id, schema_name, schema_version_hash FROM {{ source('dlt', '_dlt_loads') }}
15-
GROUP BY 1, 2, 3
15+
GROUP BY load_id, schema_name, schema_version_hash
1616
-- note that it is a hack - we make sure no other statuses exist
1717
HAVING SUM(status) = 0
1818
{% endif %}

docs/website/docs/dlt-ecosystem/destinations/mssql.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ destination.mssql.credentials="mssql://loader:<password>@loader.database.windows
141141
```
142142

143143
### dbt support
144-
No dbt support yet.
144+
This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-snowflake](https://github.com/dbt-msft/dbt-sqlserver).
145145

146146
<!--@@@DLT_TUBA mssql-->
147147

0 commit comments

Comments
 (0)