Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 30 additions & 3 deletions dlt/common/configuration/specs/sftp_credentials.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
from typing import Any, Dict, Optional, Annotated

from dlt.common.typing import TSecretStrValue, DictStrAny, SocketLike
from typing import Any, Dict, Optional, Annotated, TYPE_CHECKING, List
from typing_extensions import TypeAlias, Callable
import socket
from dlt.common.typing import TSecretStrValue, SocketLike
from dlt.common.configuration.specs.base_configuration import (
CredentialsConfiguration,
configspec,
NotResolved,
)

if TYPE_CHECKING:
try:
from paramiko import PKey
from paramiko.auth_strategy import AuthStrategy
from paramiko import Transport
except ImportError:
PKey = Any # type: ignore[misc, assignment]
AuthStrategy = Any # type: ignore[misc, assignment]
Transport = Any # type: ignore[misc, assignment]
else:
PKey = Any
AuthStrategy = Any
Transport = Any

SFTPTransportFactory: TypeAlias = Callable[[socket.socket], Transport]


@configspec
class SFTPCredentials(CredentialsConfiguration):
Expand All @@ -32,6 +49,8 @@ class SFTPCredentials(CredentialsConfiguration):
sftp_port: Optional[int] = 22
sftp_username: Optional[str] = None
sftp_password: Optional[TSecretStrValue] = None
# Runtime-only pkey; cannot be loaded from env var, skip configspec.
sftp_pkey: Annotated[Optional[PKey], NotResolved()] = None
sftp_key_filename: Optional[str] = None
sftp_key_passphrase: Optional[TSecretStrValue] = None
sftp_timeout: Optional[float] = None
Expand All @@ -48,6 +67,10 @@ class SFTPCredentials(CredentialsConfiguration):
sftp_gss_deleg_creds: Optional[bool] = True
sftp_gss_host: Optional[str] = None
sftp_gss_trust_dns: Optional[bool] = True
# Runtime-only vars below; cannot be loaded from env var, skip configspec.
sftp_disabled_algorithms: Annotated[Optional[Dict[str, List[str]]], NotResolved()] = None
sftp_transport_factory: Annotated[Optional[SFTPTransportFactory], NotResolved()] = None
sftp_auth_strategy: Annotated[Optional[AuthStrategy], NotResolved()] = None

def to_fsspec_credentials(self) -> Dict[str, Any]:
"""Return a dict that can be passed to fsspec SFTP/SSHClient.connect method."""
Expand All @@ -56,6 +79,7 @@ def to_fsspec_credentials(self) -> Dict[str, Any]:
"port": self.sftp_port,
"username": self.sftp_username,
"password": self.sftp_password,
"pkey": self.sftp_pkey,
"key_filename": self.sftp_key_filename,
"passphrase": self.sftp_key_passphrase,
"timeout": self.sftp_timeout,
Expand All @@ -71,6 +95,9 @@ def to_fsspec_credentials(self) -> Dict[str, Any]:
"gss_deleg_creds": self.sftp_gss_deleg_creds,
"gss_host": self.sftp_gss_host,
"gss_trust_dns": self.sftp_gss_trust_dns,
"disabled_algorithms": self.sftp_disabled_algorithms,
"transport_factory": self.sftp_transport_factory,
"auth_strategy": self.sftp_auth_strategy,
}

return credentials
42 changes: 25 additions & 17 deletions docs/website/docs/dlt-ecosystem/destinations/filesystem.md
Original file line number Diff line number Diff line change
Expand Up @@ -328,24 +328,32 @@ Configure your SFTP credentials by editing the `.dlt/secrets.toml` file. By defa
Below are the possible fields for SFTP credentials configuration:

```text
sftp_port # The port for SFTP, defaults to 22 (standard for SSH/SFTP)
sftp_username # Your SFTP username, defaults to None
sftp_password # Your SFTP password (if using password-based auth), defaults to None
sftp_key_filename # Path to your private key file for key-based authentication, defaults to None
sftp_key_passphrase # Passphrase for your private key (if applicable), defaults to None
sftp_timeout # Timeout for establishing a connection, defaults to None
sftp_banner_timeout # Timeout for receiving the banner during authentication, defaults to None
sftp_auth_timeout # Authentication timeout, defaults to None
sftp_channel_timeout # Channel timeout for SFTP operations, defaults to None
sftp_allow_agent # Use SSH agent for key management (if available), defaults to True
sftp_look_for_keys # Search for SSH keys in the default SSH directory (~/.ssh/), defaults to True
sftp_compress # Enable compression (can improve performance over slow networks), defaults to False
sftp_gss_auth # Use GSS-API for authentication, defaults to False
sftp_gss_kex # Use GSS-API for key exchange, defaults to False
sftp_gss_deleg_creds # Delegate credentials with GSS-API, defaults to True
sftp_gss_host # Host for GSS-API, defaults to None
sftp_gss_trust_dns # Trust DNS for GSS-API, defaults to True
sftp_port # The port for SFTP, defaults to 22 (standard for SSH/SFTP)
sftp_username # Your SFTP username, defaults to None
sftp_password # Your SFTP password (if using password-based auth), defaults to None
*sftp_pkey* # Your private key for key-based authentication, defaults to None
sftp_key_filename # Path to your private key file for key-based authentication, defaults to None
sftp_key_passphrase # Passphrase for your private key (if applicable), defaults to None
sftp_timeout # Timeout for establishing a connection, defaults to None
sftp_banner_timeout # Timeout for receiving the banner during authentication, defaults to None
sftp_auth_timeout # Authentication timeout, defaults to None
sftp_channel_timeout # Channel timeout for SFTP operations, defaults to None
sftp_allow_agent # Use SSH agent for key management (if available), defaults to True
sftp_look_for_keys # Search for SSH keys in the default SSH directory (~/.ssh/), defaults to True
sftp_compress # Enable compression (can improve performance over slow networks), defaults to False
*sftp_sock* # Custom socket to use for communication to target host, defaults to None
sftp_gss_auth # Use GSS-API for authentication, defaults to False
sftp_gss_kex # Use GSS-API for key exchange, defaults to False
sftp_gss_deleg_creds # Delegate credentials with GSS-API, defaults to True
sftp_gss_host # Host for GSS-API, defaults to None
sftp_gss_trust_dns # Trust DNS for GSS-API, defaults to True
*sftp_disabled_algorithms* # Disable specific algorithms for security, defaults to None
*sftp_transport_factory* # Custom transport factory, defaults to None
*sftp_auth_strategy* # Authentication strategy, defaults to None
```
:::note
The `*` credentials indicate parameters that cannot be set through `.dlt/secrets.toml` and must be set through code instantiation.
:::

:::info
For more information about credentials parameters: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ dev = [
"flake8-print>=5.0.0,<6",
"pip>=24.0.0",
"pydoclint>=0.6.5,<0.7",
"types-paramiko>=3.5.0.20250708",
]

# NOTE: those dependencies are used to test built in sources
Expand Down
86 changes: 84 additions & 2 deletions tests/load/filesystem_sftp/test_filesystem_sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import fsspec
import socket
import dlt
from dlt.common.configuration.specs import SFTPCredentials

from dlt.common.json import json
from dlt.common.configuration.inject import with_config
Expand All @@ -12,9 +11,14 @@

from tests.load.utils import ALL_FILESYSTEM_DRIVERS


if "sftp" not in ALL_FILESYSTEM_DRIVERS:
pytest.skip("sftp filesystem driver not configured", allow_module_level=True)

from paramiko.auth_strategy import Password
from paramiko import RSAKey, Transport
from paramiko.ssh_exception import SSHException


@with_config(spec=FilesystemConfiguration, sections=("sources", "filesystem"))
def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguration:
Expand Down Expand Up @@ -126,7 +130,17 @@ def states():
assert sorted(result_states) == sorted(expected_states)


def run_sftp_auth(user, password=None, key=None, passphrase=None, sock=None):
def run_sftp_auth(
user,
password=None,
pkey=None,
key=None,
passphrase=None,
sock=None,
disabled_algorithms=None,
transport_factory=None,
auth_strategy=None,
):
env_vars = {
"SOURCES__FILESYSTEM__BUCKET_URL": "sftp://localhost",
"SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT": "2222",
Expand All @@ -144,6 +158,14 @@ def run_sftp_auth(user, password=None, key=None, passphrase=None, sock=None):

config = get_config()

if disabled_algorithms:
config.credentials.sftp_disabled_algorithms = disabled_algorithms # type: ignore[union-attr]
if transport_factory:
config.credentials.sftp_transport_factory = transport_factory # type: ignore[union-attr]
if auth_strategy:
config.credentials.sftp_auth_strategy = auth_strategy # type: ignore[union-attr]
if pkey:
config.credentials.sftp_pkey = pkey # type: ignore[union-attr]
if sock:
config.credentials.sftp_sock = sock # type: ignore[union-attr]

Expand All @@ -163,6 +185,17 @@ def test_filesystem_sftp_auth_private_key_protected():
run_sftp_auth("bobby", key=get_key_path("bobby"), passphrase="passphrase123")


def test_filesystem_sftp_auth_pkey():
run_sftp_auth("foo", pkey=RSAKey.from_private_key_file(get_key_path("foo")))


def test_filesystem_sftp_pkey_auth_pkey_protected():
run_sftp_auth(
"bobby",
pkey=RSAKey.from_private_key_file(filename=get_key_path("bobby"), password="passphrase123"),
)


# Test requires - ssh_agent with user's bobby key loaded. The commands and file names required are:
# eval "$(ssh-agent -s)"
# cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa
Expand Down Expand Up @@ -190,3 +223,52 @@ def test_filesystem_sftp_with_socket():
sock.close()
with pytest.raises(OSError):
run_sftp_auth("billy", key=get_key_path("billy"), sock=sock)


class TaggedTransport(Transport):
"""A Transport class that tags itself so we can detect it in tests."""

def __init__(self, sock, **kwargs):
super().__init__(sock, **kwargs)
# Add any custom state or markers you like:
self.factory_tag = "used-tagged-transport"


def test_filesystem_sftp_with_tagged_transport():
created = []

def factory(sock, **kwargs):
t = TaggedTransport(sock, **kwargs)
created.append(t)
return t

run_sftp_auth("foo", key=get_key_path("foo"), transport_factory=factory)

# Verify it was used
assert len(created) == 1, "Custom transport factory never ran"
transport = created[0]
assert isinstance(transport, TaggedTransport)
assert transport.factory_tag == "used-tagged-transport"

# And ensure it's still functional
sftp = transport.open_sftp_client()
assert hasattr(sftp, "listdir")
sftp.close()


def test_filesystem_sftp_disabled_algorithms():
# we know foo’s server uses rsa keys
with pytest.raises(SSHException):
run_sftp_auth(
"foo",
key=get_key_path("foo"),
disabled_algorithms={"pubkeys": ["ssh-rsa", "rsa-sha2-256", "rsa-sha2-512"]},
)


def test_filesystem_sftp_auth_strategy():
# Verify that passing an alternate auth_strategy makes it through config.
run_sftp_auth(
"foo",
auth_strategy=Password("foo", lambda: "pass"),
)
14 changes: 14 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading