52
52
import java .util .concurrent .atomic .AtomicBoolean ;
53
53
import javax .annotation .Nullable ;
54
54
55
- import software .amazon .awssdk .core .ResponseInputStream ;
56
55
import software .amazon .awssdk .core .exception .SdkException ;
57
56
import software .amazon .awssdk .services .s3 .S3Client ;
58
57
import software .amazon .awssdk .services .s3 .model .CompleteMultipartUploadRequest ;
59
58
import software .amazon .awssdk .services .s3 .model .CompleteMultipartUploadResponse ;
60
59
import software .amazon .awssdk .services .s3 .model .GetBucketLocationRequest ;
61
- import software .amazon .awssdk .services .s3 .model .GetObjectRequest ;
62
- import software .amazon .awssdk .services .s3 .model .GetObjectResponse ;
63
60
import software .amazon .awssdk .services .s3 .model .HeadBucketRequest ;
64
61
import software .amazon .awssdk .services .s3 .model .HeadBucketResponse ;
65
62
import software .amazon .awssdk .services .s3 .model .MultipartUpload ;
152
149
import org .apache .hadoop .fs .s3a .impl .StoreContextFactory ;
153
150
import org .apache .hadoop .fs .s3a .impl .UploadContentProviders ;
154
151
import org .apache .hadoop .fs .s3a .impl .CSEUtils ;
155
- import org .apache .hadoop .fs .s3a .prefetch .PrefetchingInputStreamFactory ;
156
- import org .apache .hadoop .fs .s3a .impl .ClassicObjectInputStreamFactory ;
157
- import org .apache .hadoop .fs .s3a .impl .model .ObjectReadParameters ;
158
- import org .apache .hadoop .fs .s3a .impl .model .ObjectInputStreamFactory ;
159
- import org .apache .hadoop .fs .s3a .impl .model .ObjectInputStreamCallbacks ;
152
+ import org .apache .hadoop .fs .s3a .impl .streams .ObjectInputStreamFactory ;
153
+ import org .apache .hadoop .fs .s3a .impl .streams .ObjectReadParameters ;
154
+ import org .apache .hadoop .fs .s3a .impl .streams .ObjectInputStreamCallbacks ;
160
155
import org .apache .hadoop .fs .s3a .tools .MarkerToolOperations ;
161
156
import org .apache .hadoop .fs .s3a .tools .MarkerToolOperationsImpl ;
162
157
import org .apache .hadoop .fs .statistics .DurationTracker ;
172
167
import org .apache .hadoop .fs .store .audit .ActiveThreadSpanSource ;
173
168
import org .apache .hadoop .fs .store .audit .AuditSpan ;
174
169
import org .apache .hadoop .fs .store .audit .AuditSpanSource ;
175
- import org .apache .hadoop .io .IOUtils ;
176
170
import org .apache .hadoop .io .Text ;
177
171
import org .apache .hadoop .security .AccessControlException ;
178
172
import org .apache .hadoop .security .token .DelegationTokenIssuer ;
@@ -341,18 +335,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
341
335
private ExecutorService boundedThreadPool ;
342
336
private ThreadPoolExecutor unboundedThreadPool ;
343
337
344
- // S3 reads are prefetched asynchronously using this future pool.
338
+ /**
339
+ * Future pool built on the bounded thread pool.
340
+ */
345
341
private ExecutorServiceFuturePool futurePool ;
346
342
347
- // If true, the prefetching input stream is used for reads.
348
- private boolean prefetchEnabled ;
349
-
350
- // Size in bytes of a single prefetch block.
351
- private int prefetchBlockSize ;
352
-
353
- // Size of prefetch queue (in number of blocks).
354
- private int prefetchBlockCount ;
355
-
356
343
private int executorCapacity ;
357
344
private long multiPartThreshold ;
358
345
public static final Logger LOG = LoggerFactory .getLogger (S3AFileSystem .class );
@@ -673,22 +660,11 @@ public void initialize(URI name, Configuration originalConf)
673
660
dirOperationsPurgeUploads = conf .getBoolean (DIRECTORY_OPERATIONS_PURGE_UPLOADS ,
674
661
s3ExpressStore );
675
662
676
- this .prefetchEnabled = conf .getBoolean (PREFETCH_ENABLED_KEY , PREFETCH_ENABLED_DEFAULT );
677
- long prefetchBlockSizeLong =
678
- longBytesOption (conf , PREFETCH_BLOCK_SIZE_KEY , PREFETCH_BLOCK_DEFAULT_SIZE , 1 );
679
- if (prefetchBlockSizeLong > (long ) Integer .MAX_VALUE ) {
680
- throw new IOException ("S3A prefatch block size exceeds int limit" );
681
- }
682
- this .prefetchBlockSize = (int ) prefetchBlockSizeLong ;
683
- this .prefetchBlockCount =
684
- intOption (conf , PREFETCH_BLOCK_COUNT_KEY , PREFETCH_BLOCK_DEFAULT_COUNT , 1 );
685
663
this .isMultipartUploadEnabled = conf .getBoolean (MULTIPART_UPLOADS_ENABLED ,
686
664
DEFAULT_MULTIPART_UPLOAD_ENABLED );
687
665
// multipart copy and upload are the same; this just makes it explicit
688
666
this .isMultipartCopyEnabled = isMultipartUploadEnabled ;
689
667
690
- initThreadPools (conf );
691
-
692
668
int listVersion = conf .getInt (LIST_VERSION , DEFAULT_LIST_VERSION );
693
669
if (listVersion < 1 || listVersion > 2 ) {
694
670
LOG .warn ("Configured fs.s3a.list.version {} is invalid, forcing " +
@@ -811,6 +787,10 @@ public void initialize(URI name, Configuration originalConf)
811
787
// directly through the client manager.
812
788
// this is to aid mocking.
813
789
s3Client = getStore ().getOrCreateS3Client ();
790
+
791
+ // thread pool init requires store to be created
792
+ initThreadPools ();
793
+
814
794
// The filesystem is now ready to perform operations against
815
795
// S3
816
796
// This initiates a probe against S3 for the bucket existing.
@@ -959,12 +939,15 @@ public Statistics getInstanceStatistics() {
959
939
}
960
940
961
941
/**
962
- * Initialize the thread pool .
942
+ * Initialize the thread pools .
963
943
* This must be re-invoked after replacing the S3Client during test
964
944
* runs.
965
945
* @param conf configuration.
966
946
*/
967
- private void initThreadPools (Configuration conf ) {
947
+ private void initThreadPools () {
948
+
949
+ Configuration conf = getConf ();
950
+
968
951
final String name = "s3a-transfer-" + getBucket ();
969
952
int maxThreads = conf .getInt (MAX_THREADS , DEFAULT_MAX_THREADS );
970
953
if (maxThreads < 2 ) {
@@ -980,7 +963,9 @@ private void initThreadPools(Configuration conf) {
980
963
TimeUnit .SECONDS ,
981
964
Duration .ZERO ).getSeconds ();
982
965
983
- int numPrefetchThreads = this .prefetchEnabled ? this .prefetchBlockCount : 0 ;
966
+ final ObjectInputStreamFactory .ThreadOptions requirements =
967
+ getStore ().prefetchThreadRequirements ();
968
+ int numPrefetchThreads = requirements .sharedThreads ();
984
969
985
970
int activeTasksForBoundedThreadPool = maxThreads ;
986
971
int waitingTasksForBoundedThreadPool = maxThreads + totalTasks + numPrefetchThreads ;
@@ -998,7 +983,7 @@ private void initThreadPools(Configuration conf) {
998
983
unboundedThreadPool .allowCoreThreadTimeOut (true );
999
984
executorCapacity = intOption (conf ,
1000
985
EXECUTOR_CAPACITY , DEFAULT_EXECUTOR_CAPACITY , 1 );
1001
- if (prefetchEnabled ) {
986
+ if (requirements . createFuturePool () ) {
1002
987
final S3AInputStreamStatistics s3AInputStreamStatistics =
1003
988
statisticsContext .newInputStreamStatistics ();
1004
989
futurePool = new ExecutorServiceFuturePool (
@@ -1987,9 +1972,8 @@ protected S3AReadOpContext createReadContext(
1987
1972
fileStatus ,
1988
1973
vectoredIOContext ,
1989
1974
IOStatisticsContext .getCurrentIOStatisticsContext ().getAggregator (),
1990
- futurePool ,
1991
- prefetchBlockSize ,
1992
- prefetchBlockCount )
1975
+ futurePool
1976
+ )
1993
1977
.withAuditSpan (auditSpan );
1994
1978
openFileHelper .applyDefaultOptions (roc );
1995
1979
return roc .build ();
@@ -5519,7 +5503,7 @@ public boolean hasPathCapability(final Path path, final String capability)
5519
5503
5520
5504
// stream leak detection.
5521
5505
case StreamStatisticNames .STREAM_LEAKS :
5522
- return ! prefetchEnabled ;
5506
+ return true ;
5523
5507
5524
5508
default :
5525
5509
// is it a performance flag?
0 commit comments