Skip to content

Commit e18d0a4

Browse files
fuatbasikahmarsuhail
authored andcommitted
HADOOP-19348. Add initial support for Analytics Accelerator Library for Amazon S3 (#7192)
1 parent 053afb7 commit e18d0a4

33 files changed

+620
-32
lines changed

hadoop-tools/hadoop-aws/pom.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,17 @@
472472
<artifactId>amazon-s3-encryption-client-java</artifactId>
473473
<scope>provided</scope>
474474
</dependency>
475+
<dependency>
476+
<groupId>software.amazon.s3.analyticsaccelerator</groupId>
477+
<artifactId>analyticsaccelerator-s3</artifactId>
478+
<version>0.0.2</version>
479+
<scope>compile</scope>
480+
</dependency>
481+
<dependency>
482+
<groupId>software.amazon.awssdk.crt</groupId>
483+
<artifactId>aws-crt</artifactId>
484+
<version>0.29.10</version>
485+
</dependency>
475486
<dependency>
476487
<groupId>org.assertj</groupId>
477488
<artifactId>assertj-core</artifactId>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,4 +1780,38 @@ private Constants() {
17801780
* Value: {@value}.
17811781
*/
17821782
public static final String S3A_IO_RATE_LIMIT = "fs.s3a.io.rate.limit";
1783+
1784+
1785+
/**
1786+
* Prefix to configure Analytics Accelerator Library.
1787+
*/
1788+
public static final String ANALYTICS_ACCELERATOR_CONFIGURATION_PREFIX =
1789+
"fs.s3a.analytics.accelerator";
1790+
1791+
/**
1792+
* Config to enable Analytics Accelerator Library for Amazon S3.
1793+
* https://github.com/awslabs/analytics-accelerator-s3
1794+
*/
1795+
public static final String ANALYTICS_ACCELERATOR_ENABLED_KEY =
1796+
ANALYTICS_ACCELERATOR_CONFIGURATION_PREFIX + ".enabled";
1797+
1798+
/**
1799+
* Config to enable usage of crt client with Analytics Accelerator Library.
1800+
* It is by default true.
1801+
*/
1802+
public static final String ANALYTICS_ACCELERATOR_CRT_ENABLED =
1803+
"fs.s3a.analytics.accelerator.crt.client";
1804+
1805+
/**
1806+
* Default value for {@link #ANALYTICS_ACCELERATOR_ENABLED_KEY }
1807+
* Value {@value}.
1808+
*/
1809+
public static final boolean ANALYTICS_ACCELERATOR_ENABLED_DEFAULT = false;
1810+
1811+
/**
1812+
* Default value for {@link #ANALYTICS_ACCELERATOR_CRT_ENABLED }
1813+
* Value {@value}.
1814+
*/
1815+
public static final boolean ANALYTICS_ACCELERATOR_CRT_ENABLED_DEFAULT = true;
1816+
17831817
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@
5353

5454
import software.amazon.awssdk.core.ResponseInputStream;
5555
import software.amazon.awssdk.core.exception.SdkException;
56+
import software.amazon.awssdk.services.s3.S3AsyncClient;
5657
import software.amazon.awssdk.services.s3.S3Client;
58+
import software.amazon.awssdk.services.s3.internal.crt.S3CrtAsyncClient;
5759
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
5860
import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse;
5961
import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest;
@@ -86,6 +88,11 @@
8688
import software.amazon.awssdk.transfer.s3.model.Copy;
8789
import software.amazon.awssdk.transfer.s3.model.CopyRequest;
8890

91+
import software.amazon.s3.analyticsaccelerator.S3SdkObjectClient;
92+
import software.amazon.s3.analyticsaccelerator.S3SeekableInputStreamConfiguration;
93+
import software.amazon.s3.analyticsaccelerator.S3SeekableInputStreamFactory;
94+
import software.amazon.s3.analyticsaccelerator.common.ConnectorConfiguration;
95+
8996
import org.apache.hadoop.fs.impl.prefetch.ExecutorServiceFuturePool;
9097
import org.slf4j.Logger;
9198
import org.slf4j.LoggerFactory;
@@ -313,6 +320,13 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
313320
*/
314321
private S3Client s3Client;
315322

323+
/**
324+
* CRT-Based S3Client created of analytics accelerator library is enabled
325+
* and managed by the S3AStoreImpl. Analytics accelerator library can be
326+
* enabled with {@link Constants#ANALYTICS_ACCELERATOR_ENABLED_KEY}
327+
*/
328+
private S3AsyncClient s3AsyncClient;
329+
316330
// initial callback policy is fail-once; it's there just to assist
317331
// some mock tests and other codepaths trying to call the low level
318332
// APIs on an uninitialized filesystem.
@@ -340,6 +354,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
340354
// If true, the prefetching input stream is used for reads.
341355
private boolean prefetchEnabled;
342356

357+
// If true, S3SeekableInputStream from Analytics Accelerator for Amazon S3 will be used.
358+
private boolean analyticsAcceleratorEnabled;
359+
360+
private boolean analyticsAcceleratorCRTEnabled;
361+
343362
// Size in bytes of a single prefetch block.
344363
private int prefetchBlockSize;
345364

@@ -515,6 +534,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
515534
*/
516535
private boolean s3AccessGrantsEnabled;
517536

537+
/**
538+
* Factory to create S3SeekableInputStream if {@link this#analyticsAcceleratorEnabled} is true.
539+
*/
540+
private S3SeekableInputStreamFactory s3SeekableInputStreamFactory;
541+
518542
/** Add any deprecated keys. */
519543
@SuppressWarnings("deprecation")
520544
private static void addDeprecatedKeys() {
@@ -670,8 +694,21 @@ public void initialize(URI name, Configuration originalConf)
670694
this.prefetchBlockSize = (int) prefetchBlockSizeLong;
671695
this.prefetchBlockCount =
672696
intOption(conf, PREFETCH_BLOCK_COUNT_KEY, PREFETCH_BLOCK_DEFAULT_COUNT, 1);
697+
698+
this.analyticsAcceleratorEnabled =
699+
conf.getBoolean(ANALYTICS_ACCELERATOR_ENABLED_KEY, ANALYTICS_ACCELERATOR_ENABLED_DEFAULT);
700+
this.analyticsAcceleratorCRTEnabled =
701+
conf.getBoolean(ANALYTICS_ACCELERATOR_CRT_ENABLED,
702+
ANALYTICS_ACCELERATOR_CRT_ENABLED_DEFAULT);
703+
673704
this.isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED,
674-
DEFAULT_MULTIPART_UPLOAD_ENABLED);
705+
DEFAULT_MULTIPART_UPLOAD_ENABLED);
706+
707+
if(this.analyticsAcceleratorEnabled && !analyticsAcceleratorCRTEnabled) {
708+
// Temp change: Analytics Accelerator with S3AsyncClient do not support Multi-part upload.
709+
this.isMultipartUploadEnabled = false;
710+
}
711+
675712
// multipart copy and upload are the same; this just makes it explicit
676713
this.isMultipartCopyEnabled = isMultipartUploadEnabled;
677714

@@ -794,6 +831,27 @@ public void initialize(URI name, Configuration originalConf)
794831
// directly through the client manager.
795832
// this is to aid mocking.
796833
s3Client = store.getOrCreateS3Client();
834+
835+
if (this.analyticsAcceleratorEnabled) {
836+
LOG.info("Using S3SeekableInputStream");
837+
if(this.analyticsAcceleratorCRTEnabled) {
838+
LOG.info("Using S3 CRT client for analytics accelerator S3");
839+
this.s3AsyncClient = S3CrtAsyncClient.builder().maxConcurrency(600).build();
840+
} else {
841+
LOG.info("Using S3 async client for analytics accelerator S3");
842+
this.s3AsyncClient = store.getOrCreateAsyncClient();
843+
}
844+
845+
ConnectorConfiguration configuration = new ConnectorConfiguration(conf,
846+
ANALYTICS_ACCELERATOR_CONFIGURATION_PREFIX);
847+
S3SeekableInputStreamConfiguration seekableInputStreamConfiguration =
848+
S3SeekableInputStreamConfiguration.fromConfiguration(configuration);
849+
this.s3SeekableInputStreamFactory =
850+
new S3SeekableInputStreamFactory(
851+
new S3SdkObjectClient(this.s3AsyncClient),
852+
seekableInputStreamConfiguration);
853+
}
854+
797855
// The filesystem is now ready to perform operations against
798856
// S3
799857
// This initiates a probe against S3 for the bucket existing.
@@ -1861,6 +1919,8 @@ private FSDataInputStream executeOpen(
18611919
final Path path,
18621920
final OpenFileSupport.OpenFileInformation fileInformation)
18631921
throws IOException {
1922+
1923+
18641924
// create the input stream statistics before opening
18651925
// the file so that the time to prepare to open the file is included.
18661926
S3AInputStreamStatistics inputStreamStats =
@@ -1877,6 +1937,14 @@ private FSDataInputStream executeOpen(
18771937
fileInformation.applyOptions(readContext);
18781938
LOG.debug("Opening '{}'", readContext);
18791939

1940+
if (this.analyticsAcceleratorEnabled) {
1941+
return new FSDataInputStream(
1942+
new S3ASeekableStream(
1943+
this.bucket,
1944+
pathToKey(path),
1945+
s3SeekableInputStreamFactory));
1946+
}
1947+
18801948
if (this.prefetchEnabled) {
18811949
Configuration configuration = getConf();
18821950
initLocalDirAllocatorIfNotInitialized(configuration);
@@ -4354,9 +4422,11 @@ public void close() throws IOException {
43544422
protected synchronized void stopAllServices() {
43554423
try {
43564424
trackDuration(getDurationTrackerFactory(), FILESYSTEM_CLOSE.getSymbol(), () -> {
4357-
closeAutocloseables(LOG, store);
4425+
closeAutocloseables(LOG, store, s3SeekableInputStreamFactory);
43584426
store = null;
43594427
s3Client = null;
4428+
s3AsyncClient = null;
4429+
s3SeekableInputStreamFactory = null;
43604430

43614431
// At this point the S3A client is shut down,
43624432
// now the executor pools are closed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.hadoop.fs.s3a;
21+
22+
import java.io.EOFException;
23+
import java.io.IOException;
24+
25+
import org.apache.hadoop.fs.FSExceptionMessages;
26+
import org.apache.hadoop.fs.StreamCapabilities;
27+
import org.slf4j.Logger;
28+
import org.slf4j.LoggerFactory;
29+
30+
import org.apache.hadoop.fs.FSInputStream;
31+
32+
import software.amazon.s3.analyticsaccelerator.S3SeekableInputStream;
33+
import software.amazon.s3.analyticsaccelerator.S3SeekableInputStreamFactory;
34+
import software.amazon.s3.analyticsaccelerator.util.S3URI;
35+
36+
public class S3ASeekableStream extends FSInputStream implements StreamCapabilities {
37+
38+
private S3SeekableInputStream inputStream;
39+
private long lastReadCurrentPos = 0;
40+
private final String key;
41+
private volatile boolean closed;
42+
43+
public static final Logger LOG = LoggerFactory.getLogger(S3ASeekableStream.class);
44+
45+
public S3ASeekableStream(String bucket, String key,
46+
S3SeekableInputStreamFactory s3SeekableInputStreamFactory) {
47+
this.inputStream = s3SeekableInputStreamFactory.createStream(S3URI.of(bucket, key));
48+
this.key = key;
49+
}
50+
51+
/**
52+
* Indicates whether the given {@code capability} is supported by this stream.
53+
*
54+
* @param capability the capability to check.
55+
* @return true if the given {@code capability} is supported by this stream, false otherwise.
56+
*/
57+
@Override
58+
public boolean hasCapability(String capability) {
59+
return false;
60+
}
61+
62+
@Override
63+
public int read() throws IOException {
64+
throwIfClosed();
65+
int bytesRead;
66+
try {
67+
bytesRead = inputStream.read();
68+
} catch (IOException ioe) {
69+
onReadFailure(ioe);
70+
throw ioe;
71+
}
72+
return bytesRead;
73+
}
74+
75+
@Override
76+
public void seek(long pos) throws IOException {
77+
throwIfClosed();
78+
if (pos < 0) {
79+
throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK
80+
+ " " + pos);
81+
}
82+
inputStream.seek(pos);
83+
}
84+
85+
86+
@Override
87+
public synchronized long getPos() {
88+
if (!closed) {
89+
lastReadCurrentPos = inputStream.getPos();
90+
}
91+
return lastReadCurrentPos;
92+
}
93+
94+
95+
/**
96+
* Reads the last n bytes from the stream into a byte buffer. Blocks until end of stream is
97+
* reached. Leaves the position of the stream unaltered.
98+
*
99+
* @param buf buffer to read data into
100+
* @param off start position in buffer at which data is written
101+
* @param len the number of bytes to read; the n-th byte should be the last byte of the stream.
102+
* @return the total number of bytes read into the buffer
103+
* @throws IOException if an I/O error occurs
104+
*/
105+
public int readTail(byte[] buf, int off, int len) throws IOException {
106+
throwIfClosed();
107+
int bytesRead;
108+
try {
109+
bytesRead = inputStream.readTail(buf, off, len);
110+
} catch (IOException ioe) {
111+
onReadFailure(ioe);
112+
throw ioe;
113+
}
114+
return bytesRead;
115+
}
116+
117+
@Override
118+
public int read(byte[] buf, int off, int len) throws IOException {
119+
throwIfClosed();
120+
int bytesRead;
121+
try {
122+
bytesRead = inputStream.read(buf, off, len);
123+
} catch (IOException ioe) {
124+
onReadFailure(ioe);
125+
throw ioe;
126+
}
127+
return bytesRead;
128+
}
129+
130+
131+
@Override
132+
public boolean seekToNewSource(long l) throws IOException {
133+
return false;
134+
}
135+
136+
@Override
137+
public int available() throws IOException {
138+
throwIfClosed();
139+
return super.available();
140+
}
141+
142+
@Override
143+
public synchronized void close() throws IOException {
144+
if(!closed) {
145+
closed = true;
146+
try {
147+
inputStream.close();
148+
inputStream = null;
149+
super.close();
150+
} catch (IOException ioe) {
151+
LOG.debug("Failure closing stream {}: ", key);
152+
throw ioe;
153+
}
154+
}
155+
}
156+
157+
/**
158+
* Close the stream on read failure.
159+
* No attempt to recover from failure
160+
*
161+
* @param ioe exception caught.
162+
*/
163+
@Retries.OnceTranslated
164+
private void onReadFailure(IOException ioe) throws IOException {
165+
if (LOG.isDebugEnabled()) {
166+
LOG.debug("Got exception while trying to read from stream {}, " +
167+
"not trying to recover:",
168+
key, ioe);
169+
} else {
170+
LOG.info("Got exception while trying to read from stream {}, " +
171+
"not trying to recover:",
172+
key, ioe);
173+
}
174+
this.close();
175+
}
176+
177+
178+
protected void throwIfClosed() throws IOException {
179+
if (closed) {
180+
throw new IOException(key + ": " + FSExceptionMessages.STREAM_IS_CLOSED);
181+
}
182+
}
183+
}

0 commit comments

Comments
 (0)