dlt-hub · rudolfix · Aug 31, 2025 · Apr 28, 2025 · Apr 28, 2025 · Apr 28, 2025
diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py
@@ -932,23 +932,29 @@ def convert_numpy_to_arrow(
                     data_type=dlt_data_type,
                     inferred_arrow_type=inferred_arrow_type,
                     details=(
-                        "Insufficient decimal precision. Consider setting `precision` and `scale`"
-                        " hints: https://dlthub.com/docs/general-usage/schema/#tables-and-columns"
+                        f"Insufficient decimal precision {error_msg}. Consider setting `precision`"
+                        " and `scale` hints:"
+                        " https://dlthub.com/docs/general-usage/schema/#tables-and-columns"
                     ),
                 ) from e
 
             elif (
-                "to utf8 using function cast_string" in error_msg
-                and dlt_data_type in ("json", "text")
-                and pa.types.is_string(inferred_arrow_type)
-            ):
+                ("to utf8 using function cast_string" in error_msg and dlt_data_type == "text")
+                or dlt_data_type == "json"
+            ) and pa.types.is_string(inferred_arrow_type):
                 # this is handled by fallback case 3
                 logger.warning(
                     f"Received `data_type='{dlt_data_type}'`, data requires serialization to"
                     " string, slowing extraction. Cast the JSON field to STRING in your database"
                     " system to improve performance. For example, create and extract data from an"
                     " SQL VIEW that SELECT with CAST."
                 )
+            else:
+                raise PyToArrowConversionException(
+                    data_type=dlt_data_type,
+                    inferred_arrow_type=inferred_arrow_type,
+                    details=f"This conversion is currently unsupported by dlt ({error_msg})",
+                )
 
     # case 2: encode Sequence and Mapping types (list, tuples, set, dict, etc.) to JSON strings
     # This logic needs to be before case 3, otherwise pyarrow might infer the deserialized JSON object as a `pyarrow.struct` instead of `pyarrow.string`
@@ -977,10 +983,10 @@ def convert_numpy_to_arrow(
     if arrow_array is None and dlt_data_type is None:
         try:
             arrow_array = pa.array(column_data)
-        except (pa.ArrowInvalid, pyarrow.ArrowTypeError):
+        except (pa.ArrowInvalid, pyarrow.ArrowTypeError) as e:
             logger.warning(
-                "Type can't be inferred by `pyarrow`. Values will be encoded as in a loop, slowing"
-                " extraction."
+                f"Type can't be inferred by `pyarrow` {e.args[0]}. Values will be encoded as in a"
+                " loop, slowing extraction."
             )
             encoded_values: list[Union[None, Mapping[Any, Any], Sequence[Any], str]] = []
             for value in column_data:
@@ -1007,13 +1013,6 @@ def convert_numpy_to_arrow(
 
             arrow_array = pa.array(encoded_values)
 
-    if arrow_array is None:
-        raise PyToArrowConversionException(
-            data_type=dlt_data_type,
-            inferred_arrow_type=inferred_arrow_type,
-            details="This data type seems currently unsupported by dlt. Please open a GitHub issue",
-        )
-
     return arrow_array
 
 

diff --git a/dlt/destinations/impl/databricks/factory.py b/dlt/destinations/impl/databricks/factory.py
@@ -1,5 +1,6 @@
 from typing import Any, Optional, Type, Union, Dict, TYPE_CHECKING, Sequence, Tuple
 
+from dlt.common import logger
 from dlt.common.data_types.typing import TDataType
 from dlt.common.destination import Destination, DestinationCapabilitiesContext
 from dlt.common.data_writers.escape import escape_databricks_identifier, escape_databricks_literal
@@ -39,6 +40,7 @@ class DatabricksTypeMapper(TypeMapperImpl):
         "BOOLEAN": "bool",
         "DATE": "date",
         "TIMESTAMP": "timestamp",
+        "TIMESTAMP_NTZ": "timestamp",
         "BIGINT": "bigint",
         "INT": "bigint",
         "SMALLINT": "bigint",
@@ -83,6 +85,24 @@ def to_db_integer_type(self, column: TColumnSchema, table: PreparedTableSchema =
             f"bigint with `{precision=:}` can't be mapped to Databricks integer type"
         )
 
+    def to_db_datetime_type(
+        self,
+        column: TColumnSchema,
+        table: PreparedTableSchema = None,
+    ) -> str:
+        column_name = column["name"]
+        table_name = table["name"]
+        timezone = column.get("timezone", True)
+        precision = column.get("precision")
+
+        if precision and precision != 6:
+            logger.warn(
+                f"Databricks does not support precision {precision} for column '{column_name}' in"
+                f" table '{table_name}'. Will default to 6."
+            )
+
+        return "TIMESTAMP" if timezone else "TIMESTAMP_NTZ"
+
     def from_destination_type(
         self, db_type: str, precision: Optional[int] = None, scale: Optional[int] = None
     ) -> TColumnType:

diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py
@@ -108,7 +108,8 @@ def _raw_capabilities(self) -> DestinationCapabilitiesContext:
         caps.supports_multiple_statements = True
         caps.supports_create_table_if_not_exists = False  # IF NOT EXISTS not supported
         caps.max_rows_per_insert = 1000
-        caps.timestamp_precision = 7
+        # NOTE: timestamp_precision is 7 in the database but there's no way to write it via Python
+        caps.timestamp_precision = 6
         caps.supported_merge_strategies = ["delete-insert", "upsert", "scd2"]
         caps.supported_replace_strategies = [
             "truncate-and-insert",

diff --git a/dlt/destinations/impl/sqlalchemy/alter_table.py b/dlt/destinations/impl/sqlalchemy/alter_table.py
@@ -26,7 +26,16 @@ def flush(self) -> None:
 class MigrationMaker:
     def __init__(self, dialect: sa.engine.Dialect) -> None:
         self._buf = ListBuffer()
-        self.ctx = MigrationContext(dialect, None, {"as_sql": True, "output_buffer": self._buf})
+        self.ctx = MigrationContext(
+            dialect,
+            None,
+            {
+                "as_sql": True,
+                "output_buffer": self._buf,
+                "mssql_batch_separator": None,
+                "oracle_batch_separator": None,
+            },
+        )
         self.ops = Operations(self.ctx)
 
     def add_column(self, table_name: str, column: sa.Column, schema: str) -> None:

diff --git a/dlt/destinations/impl/sqlalchemy/merge_job.py b/dlt/destinations/impl/sqlalchemy/merge_job.py
@@ -301,9 +301,9 @@ def _get_hard_delete_col_and_cond(  # type: ignore[override]
             cond = col.isnot(None)
         if table["columns"][col_name]["data_type"] == "bool":
             if invert:
-                cond = sa.or_(cond, col.is_(False))
+                cond = sa.or_(cond, col.eq_(False))
             else:
-                cond = col.is_(True)
+                cond = col.eq_(True)
         return col_name, cond
 
     @classmethod

diff --git a/dlt/destinations/impl/sqlalchemy/type_mapper.py b/dlt/destinations/impl/sqlalchemy/type_mapper.py
@@ -92,8 +92,9 @@ def to_destination_type(  # type: ignore[override]
             if length is None and column.get("unique"):
                 length = 128
             if length is None:
-                return sa.Text()
-            return sa.String(length=length)
+                return sa.Text().with_variant(sa.UnicodeText(), "mssql")  # type: ignore[no-any-return]
+            else:
+                return sa.String(length=length).with_variant(sa.Unicode(length=length), "mssql")  # type: ignore[no-any-return]
         elif sc_t == "double":
             return self._create_double_type()
         elif sc_t == "bool":

diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
@@ -96,9 +96,10 @@ def _raw_capabilities(self) -> DestinationCapabilitiesContext:
         # 10.000 records is a "safe" amount that always seems to work.
         caps.max_rows_per_insert = 10000
 
-        # datetimeoffset can store 7 digits for fractional seconds
+        # NOTE: datetimeoffset can store 7 digits for fractional seconds, maybe you could use it with parquet in ns
+        #   precision. you can pass synapse(timestamp_precision=7) to override
         # https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql?view=sql-server-ver16
-        caps.timestamp_precision = 7
+        caps.timestamp_precision = 6
 
         caps.supported_merge_strategies = ["delete-insert", "scd2"]
         caps.supported_replace_strategies = ["truncate-and-insert", "insert-from-staging"]

diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py
@@ -63,6 +63,7 @@
 )
 
 from dlt.extract.hints import TResourceNestedHints, make_hints
+from dlt.extract.state import get_current_pipe_name
 from dlt.extract.utils import dynstr
 from dlt.extract.exceptions import (
     CurrentSourceNotAvailable,
@@ -1032,6 +1033,15 @@ def get_source() -> DltSource:
         raise CurrentSourceNotAvailable()
 
 
+def get_resource() -> DltResource:
+    """Should be executed from inside the function decorated with @dlt.resource
+
+    Returns:
+        DltResource: The resource object to which the currently executing pipe belongs
+    """
+    return Container()[SourceInjectableContext].source.resources[get_current_pipe_name()]
+
+
 TBoundItems = TypeVar("TBoundItems", bound=TDataItems)
 TDeferred = Callable[[], TBoundItems]
 TDeferredFunParams = ParamSpec("TDeferredFunParams")

diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py
@@ -267,28 +267,36 @@ def on_resolved(self) -> None:
                 "Incremental `end_value` was specified without `initial_value`."
                 "`initial_value` is required when using `end_value`."
             )
-        self._cursor_datetime_check(self.initial_value, "initial_value")
-        self._cursor_datetime_check(self.initial_value, "end_value")
         # Ensure end value is "higher" than initial value
-        if (
-            self.end_value is not None
-            and self.last_value_func([self.end_value, self.initial_value]) != self.end_value
-        ):
-            if self.last_value_func in (min, max):
-                adject = "higher" if self.last_value_func is max else "lower"
-                msg = (
-                    f"Incremental `initial_value={self.initial_value}` is {adject} than"
-                    f" `end_value={self.end_value}`. 'end_value' must be {adject} than"
-                    " `initial_value`."
-                )
-            else:
-                msg = (
-                    f"Incremental `initial_value={self.initial_value}` is greater than"
-                    f" `end_value={self.end_value}` as determined by the custom `last_value_func`."
-                    f" The result of `{self.last_value_func.__name__}([end_value,"
-                    " initial_value])` must equal `end_value`"
-                )
-            raise ConfigurationValueError(msg)
+        try:
+            if (
+                self.end_value is not None
+                and self.last_value_func([self.end_value, self.initial_value]) != self.end_value
+            ):
+                if self.last_value_func in (min, max):
+                    adject = "higher" if self.last_value_func is max else "lower"
+                    msg = (
+                        f"Incremental `initial_value={self.initial_value}` is {adject} than"
+                        f" `end_value={self.end_value}`. 'end_value' must be {adject} than"
+                        " `initial_value`."
+                    )
+                else:
+                    msg = (
+                        f"Incremental `initial_value={self.initial_value}` is greater than"
+                        f" `end_value={self.end_value}` as determined by the custom"
+                        " `last_value_func`. The result of"
+                        f" `{self.last_value_func.__name__}([end_value, initial_value])` must equal"
+                        " `end_value`"
+                    )
+                raise ConfigurationValueError(msg)
+        except ConfigurationValueError:
+            raise
+        except Exception as exc:
+            raise ConfigurationValueError(
+                f"Incremental `initial_value={self.initial_value}` and `end_value={self.end_value}`"
+                " are not comparable. Make sure they are of the same type and tz-awareness: "
+                + str(exc)
+            ) from exc
 
     def parse_native_representation(self, native_value: Any) -> None:
         if isinstance(native_value, Incremental):
@@ -344,6 +352,9 @@ def get_state(self) -> IncrementalColumnState:
                     "unique_hashes": [],
                 }
             )
+        else:
+            # update initial value in existing state
+            self._cached_state["initial_value"] = self.initial_value
         return self._cached_state
 
     @staticmethod
@@ -354,16 +365,6 @@ def _get_state(resource_name: str, cursor_path: str) -> IncrementalColumnState:
         # if state params is empty
         return state
 
-    @staticmethod
-    def _cursor_datetime_check(value: Any, arg_name: str) -> None:
-        if value and isinstance(value, datetime) and value.tzinfo is None:
-            logger.warning(
-                f"The {arg_name} argument {value} is a datetime without timezone. This may result"
-                " in an error when such values  are compared by Incremental class. Note that `dlt`"
-                " stores datetimes in timezone-aware types so the UTC timezone will be added by"
-                " the destination"
-            )
-
     @property
     def last_value(self) -> Optional[TCursorValue]:
         s = self.get_state()
@@ -382,7 +383,7 @@ def last_value(self) -> Optional[TCursorValue]:
                 )
             elif last_value is not None:
                 last_value = apply_lag(
-                    self.lag, s["initial_value"], last_value, self.last_value_func
+                    self.lag, self.initial_value, last_value, self.last_value_func
                 )
 
         return last_value