Skip to content

Commit 48a4315

Browse files
committed
snowflake connector performance improvement s3 export
1 parent f946a9b commit 48a4315

17 files changed

+969
-1110
lines changed

athena-snowflake/athena-snowflake-connection.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Parameters:
2323
SpillBucket:
2424
Description: 'The name of the bucket where this function can spill data.'
2525
Type: String
26+
SnowflakeExportBucket:
27+
Description: "The bucket where the Snowflake Query results will be exported."
28+
Type: String
2629
GlueConnection:
2730
Description: "Name of glue connection storing connection details for Federated Data source."
2831
Type: String
@@ -87,6 +90,7 @@ Resources:
8790
Principal:
8891
Service:
8992
- lambda.amazonaws.com
93+
AWS: '*'
9094
Action:
9195
- "sts:AssumeRole"
9296

@@ -150,6 +154,27 @@ Resources:
150154
Resource:
151155
- !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:connection/${GlueConnection}'
152156
- !Sub 'arn:${AWS::Partition}:glue:${AWS::Region}:${AWS::AccountId}:catalog'
157+
- Action:
158+
- s3:GetObject
159+
- s3:GetObjectVersion
160+
- s3:PutObject
161+
- s3:DeleteObject
162+
- s3:DeleteObjectVersion
163+
Effect: Allow
164+
Resource: !Sub 'arn:${AWS::Partition}:s3:::${SnowflakeExportBucket}/*'
165+
- Action:
166+
- s3:ListBucket
167+
- s3:GetBucketLocation
168+
Effect: Allow
169+
Resource: !Sub 'arn:${AWS::Partition}:s3:::${SnowflakeExportBucket}'
170+
Condition:
171+
StringLike:
172+
"s3:prefix":
173+
- !Sub "*"
174+
- Action:
175+
- lambda:GetFunction
176+
Effect: Allow
177+
Resource: !Sub 'arn:${AWS::Partition}:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}'
153178

154179
FunctionKmsPolicy:
155180
Condition: CreateKmsPolicy

athena-snowflake/athena-snowflake.yaml

Lines changed: 100 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Parameters:
3030
Description: 'The prefix within SpillBucket where this function can spill data.'
3131
Type: String
3232
Default: athena-spill
33+
SnowflakeExportBucket:
34+
Description: "The bucket where the Snowflake Query results will be exported."
35+
Type: String
3336
LambdaTimeout:
3437
Description: 'Maximum Lambda invocation runtime in seconds. (min 1 - 900 max)'
3538
Default: 900
@@ -54,12 +57,17 @@ Parameters:
5457
Description: "(Optional) An IAM policy ARN to use as the PermissionsBoundary for the created Lambda function's execution role"
5558
Default: ''
5659
Type: String
60+
LambdaRoleArn:
61+
Description: "(Optional) A custom role to be used by the Connector lambda"
62+
Type: String
63+
Default: ""
5764
Conditions:
5865
HasPermissionsBoundary: !Not [ !Equals [ !Ref PermissionsBoundaryARN, "" ] ]
5966
HasSecurityGroups: !Not [ !Equals [ !Join ["", !Ref SecurityGroupIds], "" ] ]
6067
HasSubnets: !Not [ !Equals [ !Join ["", !Ref SubnetIds], "" ] ]
6168
IsRegionBAH: !Equals [!Ref "AWS::Region", "me-south-1"]
6269
IsRegionHKG: !Equals [!Ref "AWS::Region", "ap-east-1"]
70+
NotHasLambdaRole: !Equals [ !Ref LambdaRoleArn, "" ]
6371
Resources:
6472
JdbcConnectorConfig:
6573
Type: 'AWS::Serverless::Function'
@@ -70,49 +78,107 @@ Resources:
7078
spill_bucket: !Ref SpillBucket
7179
spill_prefix: !Ref SpillPrefix
7280
default: !Ref DefaultConnectionString
81+
export_bucket: !Ref SnowflakeExportBucket
7382
FunctionName: !Ref LambdaFunctionName
7483
PackageType: "Image"
7584
ImageUri: !Sub
7685
- '${Account}.dkr.ecr.${AWS::Region}.amazonaws.com/athena-federation-repository-snowflake:2022.47.1'
7786
- Account: !If [IsRegionBAH, 084828588479, !If [IsRegionHKG, 183295418215, 292517598671]]
7887
ImageConfig:
79-
Command: [ "com.amazonaws.athena.connectors.snowflake.SnowflakeMuxCompositeHandler" ]
88+
Command: [ "com.amazonaws.athena.connectors.snowflake.SnowflakeCompositeHandler" ]
8089
Description: "Enables Amazon Athena to communicate with Snowflake using JDBC"
8190
Timeout: !Ref LambdaTimeout
8291
MemorySize: !Ref LambdaMemory
92+
Role: !If [ NotHasLambdaRole, !GetAtt FunctionRole.Arn, !Ref LambdaRoleArn ]
93+
VpcConfig:
94+
SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ]
95+
SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ]
96+
97+
FunctionRole:
98+
Condition: NotHasLambdaRole
99+
Type: AWS::IAM::Role
100+
Properties:
83101
PermissionsBoundary: !If [ HasPermissionsBoundary, !Ref PermissionsBoundaryARN, !Ref "AWS::NoValue" ]
84-
Policies:
85-
- Statement:
86-
- Action:
87-
- secretsmanager:GetSecretValue
88-
Effect: Allow
89-
Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretNamePrefix}*'
90-
Version: '2012-10-17'
91-
- Statement:
92-
- Action:
93-
- logs:CreateLogGroup
94-
Effect: Allow
95-
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*'
96-
Version: '2012-10-17'
97-
- Statement:
98-
- Action:
99-
- logs:CreateLogStream
100-
- logs:PutLogEvents
101-
Effect: Allow
102-
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*'
103-
Version: '2012-10-17'
104-
- Statement:
105-
- Action:
106-
- athena:GetQueryExecution
107-
Effect: Allow
108-
Resource: '*'
109-
Version: '2012-10-17'
102+
ManagedPolicyArns:
103+
- !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
104+
AssumeRolePolicyDocument:
105+
Version: 2012-10-17
106+
Statement:
107+
- Effect: Allow
108+
Principal:
109+
Service:
110+
- lambda.amazonaws.com
111+
AWS: '*'
112+
Action:
113+
- "sts:AssumeRole"
114+
115+
FunctionExecutionPolicy:
116+
Condition: NotHasLambdaRole
117+
Type: "AWS::IAM::Policy"
118+
Properties:
119+
PolicyName: FunctionExecutionPolicy
120+
PolicyDocument:
121+
Version: 2012-10-17
122+
Statement:
123+
- Action:
124+
- secretsmanager:GetSecretValue
125+
Effect: Allow
126+
Resource: !Sub 'arn:${AWS::Partition}:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${SecretNamePrefix}*'
127+
- Action:
128+
- logs:CreateLogGroup
129+
Effect: Allow
130+
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:*'
131+
- Action:
132+
- logs:CreateLogStream
133+
- logs:PutLogEvents
134+
Effect: Allow
135+
Resource: !Sub 'arn:${AWS::Partition}:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${LambdaFunctionName}:*'
136+
- Action:
137+
- lambda:GetFunction
138+
Effect: Allow
139+
Resource: !Sub 'arn:${AWS::Partition}:lambda:${AWS::Region}:${AWS::AccountId}:function:${LambdaFunctionName}'
140+
- Action:
141+
- athena:GetQueryExecution
142+
Effect: Allow
143+
Resource: '*'
110144
#S3CrudPolicy allows our connector to spill large responses to S3. You can optionally replace this pre-made policy
111145
#with one that is more restrictive and can only 'put' but not read,delete, or overwrite files.
112-
- S3CrudPolicy:
113-
BucketName: !Ref SpillBucket
114-
#VPCAccessPolicy allows our connector to run in a VPC so that it can access your data source.
115-
- VPCAccessPolicy: {}
116-
VpcConfig:
117-
SecurityGroupIds: !If [ HasSecurityGroups, !Ref SecurityGroupIds, !Ref "AWS::NoValue" ]
118-
SubnetIds: !If [ HasSubnets, !Ref SubnetIds, !Ref "AWS::NoValue" ]
146+
- Action:
147+
- s3:GetObject
148+
- s3:ListBucket
149+
- s3:GetBucketLocation
150+
- s3:GetObjectVersion
151+
- s3:PutObject
152+
- s3:PutObjectAcl
153+
- s3:GetLifecycleConfiguration
154+
- s3:PutLifecycleConfiguration
155+
- s3:DeleteObject
156+
Effect: Allow
157+
Resource:
158+
- Fn::Sub:
159+
- arn:${AWS::Partition}:s3:::${bucketName}
160+
- bucketName:
161+
Ref: SpillBucket
162+
- Fn::Sub:
163+
- arn:${AWS::Partition}:s3:::${bucketName}/*
164+
- bucketName:
165+
Ref: SpillBucket
166+
- Action:
167+
- s3:GetObject
168+
- s3:GetObjectVersion
169+
- s3:PutObject
170+
- s3:DeleteObject
171+
- s3:DeleteObjectVersion
172+
Effect: Allow
173+
Resource: !Sub 'arn:${AWS::Partition}:s3:::${SnowflakeExportBucket}/*'
174+
- Action:
175+
- s3:ListBucket
176+
- s3:GetBucketLocation
177+
Effect: Allow
178+
Resource: !Sub 'arn:${AWS::Partition}:s3:::${SnowflakeExportBucket}'
179+
Condition:
180+
StringLike:
181+
"s3:prefix":
182+
- !Sub "*"
183+
Roles:
184+
- !Ref FunctionRole

athena-snowflake/pom.xml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,31 @@
2020
<artifactId>athena-jdbc</artifactId>
2121
<version>2022.47.1</version>
2222
</dependency>
23+
<dependency>
24+
<groupId>org.apache.arrow</groupId>
25+
<artifactId>arrow-dataset</artifactId>
26+
<version>${apache.arrow.version}</version>
27+
</dependency>
28+
<dependency>
29+
<groupId>org.apache.arrow</groupId>
30+
<artifactId>arrow-c-data</artifactId>
31+
<version>${apache.arrow.version}</version>
32+
</dependency>
33+
<dependency>
34+
<groupId>org.apache.arrow</groupId>
35+
<artifactId>arrow-memory-core</artifactId>
36+
<version>${apache.arrow.version}</version>
37+
</dependency>
38+
<dependency>
39+
<groupId>net.java.dev.jna</groupId>
40+
<artifactId>jna-platform</artifactId>
41+
<version>5.16.0</version>
42+
</dependency>
43+
<dependency>
44+
<groupId>net.java.dev.jna</groupId>
45+
<artifactId>jna</artifactId>
46+
<version>5.16.0</version>
47+
</dependency>
2348
<dependency>
2449
<groupId>com.amazonaws</groupId>
2550
<artifactId>athena-jdbc</artifactId>

athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeCompositeHandler.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,26 @@
2424

2525
import com.amazonaws.athena.connector.lambda.handlers.CompositeHandler;
2626

27+
import java.io.IOException;
28+
import java.security.KeyStoreException;
29+
import java.security.NoSuchAlgorithmException;
30+
import java.security.cert.CertificateEncodingException;
31+
32+
import static com.amazonaws.athena.connectors.snowflake.SnowflakeUtils.installCaCertificate;
33+
import static com.amazonaws.athena.connectors.snowflake.SnowflakeUtils.setupNativeEnvironmentVariables;
34+
2735
/**
2836
* Boilerplate composite handler that allows us to use a single Lambda function for both
2937
* Metadata and Data. In this case we just compose {@link SnowflakeMetadataHandler} and {@link SnowflakeRecordHandler}.
3038
*
31-
* Recommend using {@link SnowflakeMuxCompositeHandler} instead.
3239
*/
3340
public class SnowflakeCompositeHandler
3441
extends CompositeHandler
3542
{
36-
public SnowflakeCompositeHandler()
43+
public SnowflakeCompositeHandler() throws CertificateEncodingException, IOException, NoSuchAlgorithmException, KeyStoreException
3744
{
3845
super(new SnowflakeMetadataHandler(new SnowflakeEnvironmentProperties().createEnvironment()), new SnowflakeRecordHandler(new SnowflakeEnvironmentProperties().createEnvironment()));
46+
installCaCertificate();
47+
setupNativeEnvironmentVariables();
3948
}
4049
}

athena-snowflake/src/main/java/com/amazonaws/athena/connectors/snowflake/SnowflakeConstants.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ public final class SnowflakeConstants
3535
*/
3636
public static final int SINGLE_SPLIT_LIMIT_COUNT = 10000;
3737
public static final String SNOWFLAKE_QUOTE_CHARACTER = "\"";
38+
/**
39+
* A ssl file location constant to store the SSL certificate
40+
* The file location is fixed at /tmp directory
41+
* to retrieve ssl certificate location
42+
*/
43+
public static final String SSL_CERT_FILE_LOCATION = "SSL_CERT_FILE";
44+
public static final String SSL_CERT_FILE_LOCATION_VALUE = "/tmp/cacert.pem";
3845

3946
private SnowflakeConstants() {}
4047
}

0 commit comments

Comments
 (0)