Skip to content

Commit 6e018c8

Browse files
Fix prefix match for ADLS_SAS_TOKEN (#2461)
The current logic when constructing an ADLS file system incorrectly matches [`adls.sas-token-expires-at-ms.`](https://github.com/apache/iceberg/blob/d1771207c9040f1c8b6886665b56d1a972fe402a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java#L45) prefixed keys. If this is provided and encountered prior to [`adls.sas-token.`](https://github.com/apache/iceberg/blob/d1771207c9040f1c8b6886665b56d1a972fe402a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java#L44) prefixed keys, it will incorrectly determine the `ADLS_ACCOUNT_NAME` and `ADLS_SAS_TOKEN`. For example with the following properties: ``` { 'adls.sas-token-expires-at-ms.testaccount.dfs.core.windows.net': '1757597218121', 'adls.sas-token.testaccount.dfs.core.windows.net': '<redacted>' } ``` **Before** ``` { 'adls.sas-token-expires-at-ms.testaccount.dfs.core.windows.net': '1757597218121', 'adls.sas-token.testaccount.dfs.core.windows.net': '<redacted>', 'adls.account-name': 'adls', 'adls.sas-token': '1757597218121' } ``` **After** ``` { 'adls.sas-token-expires-at-ms.testaccount.dfs.core.windows.net': '1757597218121', 'adls.sas-token.testaccount.dfs.core.windows.net': '<redacted>', 'adls.account-name': 'testaccount', 'adls.sas-token': '<redacted>' } ``` --------- Co-authored-by: Thomas Powell <[email protected]>
1 parent a9897de commit 6e018c8

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

pyiceberg/io/fsspec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def _adls(properties: Properties) -> AbstractFileSystem:
207207
from azure.core.credentials_async import AsyncTokenCredential
208208

209209
for key, sas_token in {
210-
key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in properties.items() if key.startswith(ADLS_SAS_TOKEN)
210+
key.replace(f"{ADLS_SAS_TOKEN}.", ""): value for key, value in properties.items() if key.startswith(f"{ADLS_SAS_TOKEN}.")
211211
}.items():
212212
if ADLS_ACCOUNT_NAME not in properties:
213213
properties[ADLS_ACCOUNT_NAME] = key.split(".")[0]

tests/io/test_fsspec.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,34 @@ def test_fsspec_pickle_round_trip_aldfs(adls_fsspec_fileio: FsspecFileIO) -> Non
508508
_test_fsspec_pickle_round_trip(adls_fsspec_fileio, "abfss://tests/foo.txt")
509509

510510

511+
@pytest.mark.adls
512+
def test_adls_account_name_sas_token_extraction() -> None:
513+
session_properties: Properties = {
514+
"adls.tenant-id": "test-tenant-id",
515+
"adls.account-host": "testaccount.dfs.core.windows.net",
516+
"adls.sas-token.testaccount.dfs.core.windows.net": "test-sas-token",
517+
"adls.sas-token-expires-at-ms.testaccount.dfs.core.windows.net": "1757597218121",
518+
}
519+
520+
with mock.patch("adlfs.AzureBlobFileSystem") as mock_adlfs:
521+
adls_fileio = FsspecFileIO(properties=session_properties)
522+
filename = str(uuid.uuid4())
523+
524+
adls_fileio.new_input(location=f"abfss://tests/{filename}")
525+
526+
mock_adlfs.assert_called_with(
527+
connection_string=None,
528+
credential=None,
529+
account_name="testaccount",
530+
account_key=None,
531+
sas_token="test-sas-token",
532+
tenant_id="test-tenant-id",
533+
client_id=None,
534+
client_secret=None,
535+
account_host="testaccount.dfs.core.windows.net",
536+
)
537+
538+
511539
@pytest.mark.gcs
512540
def test_fsspec_new_input_file_gcs(fsspec_fileio_gcs: FsspecFileIO) -> None:
513541
"""Test creating a new input file from a fsspec file-io"""

0 commit comments

Comments
 (0)