11package de .tum .in .www1 .hephaestus .agent .sandbox .docker ;
22
33import de .tum .in .www1 .hephaestus .agent .sandbox .spi .SandboxException ;
4+ import java .io .BufferedInputStream ;
5+ import java .io .BufferedOutputStream ;
46import java .io .ByteArrayInputStream ;
57import java .io .ByteArrayOutputStream ;
68import java .io .IOException ;
79import java .io .InputStream ;
10+ import java .io .OutputStream ;
811import java .nio .file .Files ;
912import java .nio .file .Path ;
1013import java .util .HashMap ;
1114import java .util .Map ;
15+ import java .util .stream .Stream ;
1216import org .apache .commons .compress .archivers .tar .TarArchiveEntry ;
1317import org .apache .commons .compress .archivers .tar .TarArchiveInputStream ;
1418import org .apache .commons .compress .archivers .tar .TarArchiveOutputStream ;
@@ -34,19 +38,38 @@ public class SandboxWorkspaceManager {
3438 /** Maximum total size of injected input files (50 MB). */
3539 static final long MAX_INPUT_BYTES = 50L * 1024 * 1024 ;
3640
41+ /** Maximum total size of a directory injected via tar (1 GB). */
42+ static final long MAX_DIRECTORY_BYTES = 1024L * 1024 * 1024 ;
43+
44+ /** Maximum number of entries (files + directories) in a directory injection. */
45+ static final int MAX_DIRECTORY_ENTRIES = 500_000 ;
46+
47+ /** Maximum directory tree depth for walk operations. */
48+ static final int MAX_WALK_DEPTH = 50 ;
49+
3750 private final DockerFileOperations fileOps ;
3851 private final long maxOutputBytes ;
3952 private final long maxSingleFileBytes ;
53+ private final long maxDirectoryBytes ;
54+ private final int maxDirectoryEntries ;
4055
4156 public SandboxWorkspaceManager (DockerFileOperations fileOps ) {
42- this (fileOps , MAX_OUTPUT_BYTES , MAX_SINGLE_FILE_BYTES );
57+ this (fileOps , MAX_OUTPUT_BYTES , MAX_SINGLE_FILE_BYTES , MAX_DIRECTORY_BYTES , MAX_DIRECTORY_ENTRIES );
4358 }
4459
45- /** Package-private constructor for testing with smaller limits. */
46- SandboxWorkspaceManager (DockerFileOperations fileOps , long maxOutputBytes , long maxSingleFileBytes ) {
60+ /** Package-private constructor for testing with custom limits. */
61+ SandboxWorkspaceManager (
62+ DockerFileOperations fileOps ,
63+ long maxOutputBytes ,
64+ long maxSingleFileBytes ,
65+ long maxDirectoryBytes ,
66+ int maxDirectoryEntries
67+ ) {
4768 this .fileOps = fileOps ;
4869 this .maxOutputBytes = maxOutputBytes ;
4970 this .maxSingleFileBytes = maxSingleFileBytes ;
71+ this .maxDirectoryBytes = maxDirectoryBytes ;
72+ this .maxDirectoryEntries = maxDirectoryEntries ;
5073 }
5174
5275 /**
@@ -93,29 +116,78 @@ public void injectDirectories(String containerId, Map<String, String> directoryM
93116 }
94117
95118 /**
96- * Walk a host directory, create a tar archive, and copy it into the container.
97- * The tar entries are prefixed with the final path component so that extracting at
98- * the parent of containerPath produces the correct layout.
119+ * Walk a host directory, create a tar archive on a temp file, and stream it into the container.
120+ *
121+ * <p>Uses a temporary file instead of {@link ByteArrayOutputStream} to avoid loading the entire
122+ * archive into JVM heap. Memory usage is O(buffer_size) regardless of directory size, since each
123+ * file is streamed through a fixed buffer. The docker-java transport streams the tar lazily via
124+ * chunked transfer encoding — no additional buffering occurs downstream.
125+ *
126+ * <p>The tar entries are prefixed with the final path component so that extracting at the parent
127+ * of containerPath produces the correct layout.
99128 */
100129 private void injectDirectoryViaTar (String containerId , String hostPath , String containerPath ) {
101130 Path hostDir = Path .of (hostPath );
102- // Container path parent is where we extract; the tar has the dir name as prefix
103131 Path containerParent = Path .of (containerPath ).getParent ();
104132 String dirName = Path .of (containerPath ).getFileName ().toString ();
105133 if (containerParent == null ) {
106134 containerParent = Path .of ("/" );
107135 }
108136
137+ Path tempTar = null ;
138+ try {
139+ tempTar = Files .createTempFile ("hephaestus-inject-" , ".tar" );
140+
141+ // Phase 1: Walk directory and write tar to temp file.
142+ // Memory: O(COPY_BUFFER_SIZE) — each file is streamed, never loaded whole.
143+ writeTarToFile (tempTar , hostDir , dirName , hostPath );
144+
145+ // Phase 2: Stream tar from disk to Docker daemon.
146+ // docker-java wraps this in InputStreamEntity (chunked transfer) — no heap copy.
147+ try (InputStream tarStream = new BufferedInputStream (Files .newInputStream (tempTar ))) {
148+ fileOps .copyArchiveToContainer (containerId , containerParent .toString (), tarStream );
149+ }
150+ } catch (IOException e ) {
151+ throw new SandboxException ("Failed to inject directory " + hostPath + " into container " + containerId , e );
152+ } finally {
153+ if (tempTar != null ) {
154+ try {
155+ Files .deleteIfExists (tempTar );
156+ } catch (IOException e ) {
157+ log .warn ("Failed to delete temp tar file {}: {}" , tempTar , e .getMessage ());
158+ }
159+ }
160+ }
161+ }
162+
163+ /** Buffer size for streaming file contents into the tar archive (64 KB). */
164+ private static final int COPY_BUFFER_SIZE = 64 * 1024 ;
165+
166+ /**
167+ * Write a tar archive of the given directory to a file on disk. Files are streamed through a
168+ * fixed-size buffer rather than loaded entirely into memory.
169+ */
170+ private void writeTarToFile (Path tarFile , Path hostDir , String dirName , String hostPath ) throws IOException {
171+ long [] totalBytes = { 0 };
172+ int [] entryCount = { 0 };
173+
109174 try (
110- ByteArrayOutputStream baos = new ByteArrayOutputStream ();
111- TarArchiveOutputStream tar = new TarArchiveOutputStream (baos )
175+ OutputStream fileOut = new BufferedOutputStream (Files .newOutputStream (tarFile ), COPY_BUFFER_SIZE );
176+ TarArchiveOutputStream tar = new TarArchiveOutputStream (fileOut );
177+ Stream <Path > paths = Files .walk (hostDir , MAX_WALK_DEPTH )
112178 ) {
113179 tar .setLongFileMode (TarArchiveOutputStream .LONGFILE_POSIX );
114180 tar .setBigNumberMode (TarArchiveOutputStream .BIGNUMBER_POSIX );
115181
116- // Walk the directory tree and add each file/directory
117- Files .walk (hostDir ).forEach (path -> {
182+ paths .forEach (path -> {
118183 try {
184+ entryCount [0 ]++;
185+ if (entryCount [0 ] > maxDirectoryEntries ) {
186+ throw new SandboxException (
187+ "Directory injection exceeds entry count limit (" + maxDirectoryEntries + "): " + hostPath
188+ );
189+ }
190+
119191 String relativePath = hostDir .relativize (path ).toString ();
120192 String entryName = relativePath .isEmpty () ? dirName : dirName + "/" + relativePath ;
121193
@@ -125,27 +197,33 @@ private void injectDirectoryViaTar(String containerId, String hostPath, String c
125197 tar .putArchiveEntry (dirEntry );
126198 tar .closeArchiveEntry ();
127199 } else if (Files .isRegularFile (path )) {
128- byte [] content = Files .readAllBytes (path );
200+ long fileSize = Files .size (path );
201+ totalBytes [0 ] += fileSize ;
202+ if (totalBytes [0 ] > maxDirectoryBytes ) {
203+ throw new SandboxException (
204+ "Directory injection exceeds size limit (" + maxDirectoryBytes + " bytes): " + hostPath
205+ );
206+ }
207+
129208 TarArchiveEntry fileEntry = new TarArchiveEntry (entryName );
130- fileEntry .setSize (content . length );
209+ fileEntry .setSize (fileSize );
131210 fileEntry .setModTime (Files .getLastModifiedTime (path ).toMillis ());
132211 tar .putArchiveEntry (fileEntry );
133- tar .write (content );
212+
213+ // Stream file through fixed buffer — not Files.readAllBytes()
214+ try (InputStream fileIn = Files .newInputStream (path )) {
215+ fileIn .transferTo (tar );
216+ }
134217 tar .closeArchiveEntry ();
135218 }
136- // Skip symlinks for security (already validated above)
219+ // Symlinks are silently skipped: Files.walk() does not follow them by default,
220+ // and Files.isRegularFile/isDirectory return false for unresolved symlinks.
137221 } catch (IOException e ) {
138222 throw new SandboxException ("Failed to add file to tar: " + path , e );
139223 }
140224 });
141225
142226 tar .finish ();
143-
144- try (InputStream tarStream = new ByteArrayInputStream (baos .toByteArray ())) {
145- fileOps .copyArchiveToContainer (containerId , containerParent .toString (), tarStream );
146- }
147- } catch (IOException e ) {
148- throw new SandboxException ("Failed to inject directory " + hostPath + " into container " + containerId , e );
149227 }
150228 }
151229
0 commit comments