Skip to content

Commit f17e981

Browse files
AyushPatel101Ayush Patel
andauthored
Add remaining paramiko connect params to SFTP filesystem (#2823)
* Add pkey, disabled_algorithms, transport_factory and auth_strategy parameters to paramiko.client.connect. Also update filesystem docs for SFTP creds * Move paramiko imports after the pytest.skip --------- Co-authored-by: Ayush Patel <[email protected]>
1 parent 440a7a3 commit f17e981

File tree

5 files changed

+154
-22
lines changed

5 files changed

+154
-22
lines changed

dlt/common/configuration/specs/sftp_credentials.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,29 @@
1-
from typing import Any, Dict, Optional, Annotated
2-
3-
from dlt.common.typing import TSecretStrValue, DictStrAny, SocketLike
1+
from typing import Any, Dict, Optional, Annotated, TYPE_CHECKING, List
2+
from typing_extensions import TypeAlias, Callable
3+
import socket
4+
from dlt.common.typing import TSecretStrValue, SocketLike
45
from dlt.common.configuration.specs.base_configuration import (
56
CredentialsConfiguration,
67
configspec,
78
NotResolved,
89
)
910

11+
if TYPE_CHECKING:
12+
try:
13+
from paramiko import PKey
14+
from paramiko.auth_strategy import AuthStrategy
15+
from paramiko import Transport
16+
except ImportError:
17+
PKey = Any # type: ignore[misc, assignment]
18+
AuthStrategy = Any # type: ignore[misc, assignment]
19+
Transport = Any # type: ignore[misc, assignment]
20+
else:
21+
PKey = Any
22+
AuthStrategy = Any
23+
Transport = Any
24+
25+
SFTPTransportFactory: TypeAlias = Callable[[socket.socket], Transport]
26+
1027

1128
@configspec
1229
class SFTPCredentials(CredentialsConfiguration):
@@ -32,6 +49,8 @@ class SFTPCredentials(CredentialsConfiguration):
3249
sftp_port: Optional[int] = 22
3350
sftp_username: Optional[str] = None
3451
sftp_password: Optional[TSecretStrValue] = None
52+
# Runtime-only pkey; cannot be loaded from env var, skip configspec.
53+
sftp_pkey: Annotated[Optional[PKey], NotResolved()] = None
3554
sftp_key_filename: Optional[str] = None
3655
sftp_key_passphrase: Optional[TSecretStrValue] = None
3756
sftp_timeout: Optional[float] = None
@@ -48,6 +67,10 @@ class SFTPCredentials(CredentialsConfiguration):
4867
sftp_gss_deleg_creds: Optional[bool] = True
4968
sftp_gss_host: Optional[str] = None
5069
sftp_gss_trust_dns: Optional[bool] = True
70+
# Runtime-only vars below; cannot be loaded from env var, skip configspec.
71+
sftp_disabled_algorithms: Annotated[Optional[Dict[str, List[str]]], NotResolved()] = None
72+
sftp_transport_factory: Annotated[Optional[SFTPTransportFactory], NotResolved()] = None
73+
sftp_auth_strategy: Annotated[Optional[AuthStrategy], NotResolved()] = None
5174

5275
def to_fsspec_credentials(self) -> Dict[str, Any]:
5376
"""Return a dict that can be passed to fsspec SFTP/SSHClient.connect method."""
@@ -56,6 +79,7 @@ def to_fsspec_credentials(self) -> Dict[str, Any]:
5679
"port": self.sftp_port,
5780
"username": self.sftp_username,
5881
"password": self.sftp_password,
82+
"pkey": self.sftp_pkey,
5983
"key_filename": self.sftp_key_filename,
6084
"passphrase": self.sftp_key_passphrase,
6185
"timeout": self.sftp_timeout,
@@ -71,6 +95,9 @@ def to_fsspec_credentials(self) -> Dict[str, Any]:
7195
"gss_deleg_creds": self.sftp_gss_deleg_creds,
7296
"gss_host": self.sftp_gss_host,
7397
"gss_trust_dns": self.sftp_gss_trust_dns,
98+
"disabled_algorithms": self.sftp_disabled_algorithms,
99+
"transport_factory": self.sftp_transport_factory,
100+
"auth_strategy": self.sftp_auth_strategy,
74101
}
75102

76103
return credentials

docs/website/docs/dlt-ecosystem/destinations/filesystem.md

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -328,24 +328,32 @@ Configure your SFTP credentials by editing the `.dlt/secrets.toml` file. By defa
328328
Below are the possible fields for SFTP credentials configuration:
329329

330330
```text
331-
sftp_port # The port for SFTP, defaults to 22 (standard for SSH/SFTP)
332-
sftp_username # Your SFTP username, defaults to None
333-
sftp_password # Your SFTP password (if using password-based auth), defaults to None
334-
sftp_key_filename # Path to your private key file for key-based authentication, defaults to None
335-
sftp_key_passphrase # Passphrase for your private key (if applicable), defaults to None
336-
sftp_timeout # Timeout for establishing a connection, defaults to None
337-
sftp_banner_timeout # Timeout for receiving the banner during authentication, defaults to None
338-
sftp_auth_timeout # Authentication timeout, defaults to None
339-
sftp_channel_timeout # Channel timeout for SFTP operations, defaults to None
340-
sftp_allow_agent # Use SSH agent for key management (if available), defaults to True
341-
sftp_look_for_keys # Search for SSH keys in the default SSH directory (~/.ssh/), defaults to True
342-
sftp_compress # Enable compression (can improve performance over slow networks), defaults to False
343-
sftp_gss_auth # Use GSS-API for authentication, defaults to False
344-
sftp_gss_kex # Use GSS-API for key exchange, defaults to False
345-
sftp_gss_deleg_creds # Delegate credentials with GSS-API, defaults to True
346-
sftp_gss_host # Host for GSS-API, defaults to None
347-
sftp_gss_trust_dns # Trust DNS for GSS-API, defaults to True
331+
sftp_port # The port for SFTP, defaults to 22 (standard for SSH/SFTP)
332+
sftp_username # Your SFTP username, defaults to None
333+
sftp_password # Your SFTP password (if using password-based auth), defaults to None
334+
*sftp_pkey* # Your private key for key-based authentication, defaults to None
335+
sftp_key_filename # Path to your private key file for key-based authentication, defaults to None
336+
sftp_key_passphrase # Passphrase for your private key (if applicable), defaults to None
337+
sftp_timeout # Timeout for establishing a connection, defaults to None
338+
sftp_banner_timeout # Timeout for receiving the banner during authentication, defaults to None
339+
sftp_auth_timeout # Authentication timeout, defaults to None
340+
sftp_channel_timeout # Channel timeout for SFTP operations, defaults to None
341+
sftp_allow_agent # Use SSH agent for key management (if available), defaults to True
342+
sftp_look_for_keys # Search for SSH keys in the default SSH directory (~/.ssh/), defaults to True
343+
sftp_compress # Enable compression (can improve performance over slow networks), defaults to False
344+
*sftp_sock* # Custom socket to use for communication to target host, defaults to None
345+
sftp_gss_auth # Use GSS-API for authentication, defaults to False
346+
sftp_gss_kex # Use GSS-API for key exchange, defaults to False
347+
sftp_gss_deleg_creds # Delegate credentials with GSS-API, defaults to True
348+
sftp_gss_host # Host for GSS-API, defaults to None
349+
sftp_gss_trust_dns # Trust DNS for GSS-API, defaults to True
350+
*sftp_disabled_algorithms* # Disable specific algorithms for security, defaults to None
351+
*sftp_transport_factory* # Custom transport factory, defaults to None
352+
*sftp_auth_strategy* # Authentication strategy, defaults to None
348353
```
354+
:::note
355+
The `*` credentials indicate parameters that cannot be set through `.dlt/secrets.toml` and must be set through code instantiation.
356+
:::
349357

350358
:::info
351359
For more information about credentials parameters: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ dev = [
234234
"flake8-print>=5.0.0,<6",
235235
"pip>=24.0.0",
236236
"pydoclint>=0.6.5,<0.7",
237+
"types-paramiko>=3.5.0.20250708",
237238
]
238239

239240
# NOTE: those dependencies are used to test built in sources

tests/load/filesystem_sftp/test_filesystem_sftp.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import fsspec
44
import socket
55
import dlt
6-
from dlt.common.configuration.specs import SFTPCredentials
76

87
from dlt.common.json import json
98
from dlt.common.configuration.inject import with_config
@@ -12,9 +11,14 @@
1211

1312
from tests.load.utils import ALL_FILESYSTEM_DRIVERS
1413

14+
1515
if "sftp" not in ALL_FILESYSTEM_DRIVERS:
1616
pytest.skip("sftp filesystem driver not configured", allow_module_level=True)
1717

18+
from paramiko.auth_strategy import Password
19+
from paramiko import RSAKey, Transport
20+
from paramiko.ssh_exception import SSHException
21+
1822

1923
@with_config(spec=FilesystemConfiguration, sections=("sources", "filesystem"))
2024
def get_config(config: FilesystemConfiguration = None) -> FilesystemConfiguration:
@@ -126,7 +130,17 @@ def states():
126130
assert sorted(result_states) == sorted(expected_states)
127131

128132

129-
def run_sftp_auth(user, password=None, key=None, passphrase=None, sock=None):
133+
def run_sftp_auth(
134+
user,
135+
password=None,
136+
pkey=None,
137+
key=None,
138+
passphrase=None,
139+
sock=None,
140+
disabled_algorithms=None,
141+
transport_factory=None,
142+
auth_strategy=None,
143+
):
130144
env_vars = {
131145
"SOURCES__FILESYSTEM__BUCKET_URL": "sftp://localhost",
132146
"SOURCES__FILESYSTEM__CREDENTIALS__SFTP_PORT": "2222",
@@ -144,6 +158,14 @@ def run_sftp_auth(user, password=None, key=None, passphrase=None, sock=None):
144158

145159
config = get_config()
146160

161+
if disabled_algorithms:
162+
config.credentials.sftp_disabled_algorithms = disabled_algorithms # type: ignore[union-attr]
163+
if transport_factory:
164+
config.credentials.sftp_transport_factory = transport_factory # type: ignore[union-attr]
165+
if auth_strategy:
166+
config.credentials.sftp_auth_strategy = auth_strategy # type: ignore[union-attr]
167+
if pkey:
168+
config.credentials.sftp_pkey = pkey # type: ignore[union-attr]
147169
if sock:
148170
config.credentials.sftp_sock = sock # type: ignore[union-attr]
149171

@@ -163,6 +185,17 @@ def test_filesystem_sftp_auth_private_key_protected():
163185
run_sftp_auth("bobby", key=get_key_path("bobby"), passphrase="passphrase123")
164186

165187

188+
def test_filesystem_sftp_auth_pkey():
189+
run_sftp_auth("foo", pkey=RSAKey.from_private_key_file(get_key_path("foo")))
190+
191+
192+
def test_filesystem_sftp_pkey_auth_pkey_protected():
193+
run_sftp_auth(
194+
"bobby",
195+
pkey=RSAKey.from_private_key_file(filename=get_key_path("bobby"), password="passphrase123"),
196+
)
197+
198+
166199
# Test requires - ssh_agent with user's bobby key loaded. The commands and file names required are:
167200
# eval "$(ssh-agent -s)"
168201
# cp /path/to/tests/load/filesystem_sftp/bobby_rsa* ~/.ssh/id_rsa
@@ -190,3 +223,52 @@ def test_filesystem_sftp_with_socket():
190223
sock.close()
191224
with pytest.raises(OSError):
192225
run_sftp_auth("billy", key=get_key_path("billy"), sock=sock)
226+
227+
228+
class TaggedTransport(Transport):
229+
"""A Transport class that tags itself so we can detect it in tests."""
230+
231+
def __init__(self, sock, **kwargs):
232+
super().__init__(sock, **kwargs)
233+
# Add any custom state or markers you like:
234+
self.factory_tag = "used-tagged-transport"
235+
236+
237+
def test_filesystem_sftp_with_tagged_transport():
238+
created = []
239+
240+
def factory(sock, **kwargs):
241+
t = TaggedTransport(sock, **kwargs)
242+
created.append(t)
243+
return t
244+
245+
run_sftp_auth("foo", key=get_key_path("foo"), transport_factory=factory)
246+
247+
# Verify it was used
248+
assert len(created) == 1, "Custom transport factory never ran"
249+
transport = created[0]
250+
assert isinstance(transport, TaggedTransport)
251+
assert transport.factory_tag == "used-tagged-transport"
252+
253+
# And ensure it's still functional
254+
sftp = transport.open_sftp_client()
255+
assert hasattr(sftp, "listdir")
256+
sftp.close()
257+
258+
259+
def test_filesystem_sftp_disabled_algorithms():
260+
# we know foo’s server uses rsa keys
261+
with pytest.raises(SSHException):
262+
run_sftp_auth(
263+
"foo",
264+
key=get_key_path("foo"),
265+
disabled_algorithms={"pubkeys": ["ssh-rsa", "rsa-sha2-256", "rsa-sha2-512"]},
266+
)
267+
268+
269+
def test_filesystem_sftp_auth_strategy():
270+
# Verify that passing an alternate auth_strategy makes it through config.
271+
run_sftp_auth(
272+
"foo",
273+
auth_strategy=Password("foo", lambda: "pass"),
274+
)

uv.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)