Skip to content

Commit bef3add

Browse files
committed
Relevant notes and dosctring improvements
1 parent e94bc93 commit bef3add

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

dlt/common/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ def digest256_tar_stream(stream: BinaryIO, chunk_size: int = 8192) -> str:
137137
Hashes only filenames and file contents, ignoring timestamps and other metadata.
138138
This ensures identical file contents produce identical hashes regardless of when
139139
the tar was created.
140+
141+
Note: This function operates entirely in-memory using tar.extractfile() which reads
142+
from the archive stream. No files are written to disk, preventing leakage of sensitive
143+
data that may be contained in the archive.
140144
"""
141145
stream.seek(0)
142146
hash_obj = hashlib.sha3_256()

tests/workspace/deployment/test_package_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def test_build_package() -> None:
7575
assert str(package_path).startswith(f"{ctx.data_dir}{os.sep}deployment-")
7676
assert len(content_hash) == 44 # sha3_256 base64 string
7777

78+
# NOTE: Sleep ensures tarballs have different timestamps in their metadata, proving
79+
# digest256_tar_stream produces identical hashes despite different creation times
7880
time.sleep(0.2)
7981

8082
package_path_2, content_hash_2 = builder.build_package(selector)

0 commit comments

Comments
 (0)