-
Notifications
You must be signed in to change notification settings - Fork 17
5091 Multi threaded state store committer #6189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
5043caf
52033ae
baa7bd3
ef06f6c
22627b9
80d7435
b3f2794
5438582
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,9 +17,29 @@ | |
|
|
||
| import software.amazon.awscdk.Duration; | ||
| import software.amazon.awscdk.NestedStack; | ||
| import software.amazon.awscdk.services.autoscaling.AutoScalingGroup; | ||
| import software.amazon.awscdk.services.autoscaling.UpdatePolicy; | ||
| import software.amazon.awscdk.services.ec2.IVpc; | ||
| import software.amazon.awscdk.services.ec2.InstanceType; | ||
| import software.amazon.awscdk.services.ec2.LaunchTemplate; | ||
| import software.amazon.awscdk.services.ec2.UserData; | ||
| import software.amazon.awscdk.services.ec2.Vpc; | ||
| import software.amazon.awscdk.services.ec2.VpcLookupOptions; | ||
| import software.amazon.awscdk.services.ecr.IRepository; | ||
| import software.amazon.awscdk.services.ecr.Repository; | ||
| import software.amazon.awscdk.services.ecs.AmiHardwareType; | ||
| import software.amazon.awscdk.services.ecs.AsgCapacityProvider; | ||
| import software.amazon.awscdk.services.ecs.Cluster; | ||
| import software.amazon.awscdk.services.ecs.ContainerDefinitionOptions; | ||
| import software.amazon.awscdk.services.ecs.ContainerImage; | ||
| import software.amazon.awscdk.services.ecs.Ec2Service; | ||
| import software.amazon.awscdk.services.ecs.Ec2TaskDefinition; | ||
| import software.amazon.awscdk.services.ecs.EcsOptimizedImage; | ||
| import software.amazon.awscdk.services.iam.Effect; | ||
| import software.amazon.awscdk.services.iam.IGrantable; | ||
| import software.amazon.awscdk.services.iam.PolicyStatement; | ||
| import software.amazon.awscdk.services.iam.Role; | ||
| import software.amazon.awscdk.services.iam.ServicePrincipal; | ||
| import software.amazon.awscdk.services.lambda.IFunction; | ||
| import software.amazon.awscdk.services.lambda.eventsources.SqsEventSource; | ||
| import software.amazon.awscdk.services.logs.ILogGroup; | ||
|
|
@@ -37,18 +57,26 @@ | |
| import sleeper.cdk.stack.ingest.IngestTrackerResources; | ||
| import sleeper.cdk.util.TrackDeadLetters; | ||
| import sleeper.cdk.util.Utils; | ||
| import sleeper.core.deploy.DockerDeployment; | ||
| import sleeper.core.deploy.LambdaHandler; | ||
| import sleeper.core.properties.instance.InstanceProperties; | ||
| import sleeper.core.properties.instance.TableStateProperty; | ||
| import sleeper.core.properties.model.StateStoreCommitterPlatform; | ||
| import sleeper.core.util.EnvironmentUtils; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.REGION; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_DLQ_ARN; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_DLQ_URL; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_EVENT_SOURCE_ID; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_LOG_GROUP; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_QUEUE_ARN; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.STATESTORE_COMMITTER_QUEUE_URL; | ||
| import static sleeper.core.properties.instance.CdkDefinedInstanceProperty.VERSION; | ||
| import static sleeper.core.properties.instance.CommonProperty.ID; | ||
| import static sleeper.core.properties.instance.CommonProperty.VPC_ID; | ||
| import static sleeper.core.properties.instance.TableStateProperty.STATESTORE_COMMITTER_BATCH_SIZE; | ||
| import static sleeper.core.properties.instance.TableStateProperty.STATESTORE_COMMITTER_LAMBDA_CONCURRENCY_MAXIMUM; | ||
| import static sleeper.core.properties.instance.TableStateProperty.STATESTORE_COMMITTER_LAMBDA_CONCURRENCY_RESERVED; | ||
|
|
@@ -79,10 +107,15 @@ public StateStoreCommitterStack( | |
| SleeperLambdaCode lambdaCode = jars.lambdaCode(jarsBucket); | ||
|
|
||
| commitQueue = sqsQueueForStateStoreCommitter(policiesStack, deadLetters); | ||
| lambdaToCommitStateStoreUpdates( | ||
|
|
||
| if (this.instanceProperties.getEnumValue(TableStateProperty.STATESTORE_COMMITTER_PLATFORM, StateStoreCommitterPlatform.class).equals(StateStoreCommitterPlatform.EC2)) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might be a bit easier to read if you static import the instance property STATESTORE_COMMITTER_PLATFORM. There are also other references to instance properties that could be static imported in this file. |
||
| ecsTaskToCommitStateStoreUpdates(loggingStack, configBucketStack, tableIndexStack, stateStoreStacks, commitQueue); | ||
| } else { | ||
| lambdaToCommitStateStoreUpdates( | ||
| loggingStack, policiesStack, lambdaCode, | ||
| configBucketStack, tableIndexStack, stateStoreStacks, | ||
| compactionTracker, ingestTracker); | ||
| } | ||
| } | ||
|
|
||
| private Queue sqsQueueForStateStoreCommitter(ManagedPoliciesStack policiesStack, TrackDeadLetters deadLetters) { | ||
|
|
@@ -116,6 +149,91 @@ private Queue sqsQueueForStateStoreCommitter(ManagedPoliciesStack policiesStack, | |
| return queue; | ||
| } | ||
|
|
||
| private void ecsTaskToCommitStateStoreUpdates(LoggingStack loggingStack, ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, StateStoreStacks stateStoreStacks, Queue commitQueue) { | ||
| String instanceId = this.instanceProperties.get(ID); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are unnecessary uses of "this" in this file. Please see our coding conventions: |
||
|
|
||
| IVpc vpc = Vpc.fromLookup(this, "vpc", VpcLookupOptions.builder() | ||
| .vpcId(this.instanceProperties.get(VPC_ID)) | ||
| .build()); | ||
| String clusterName = String.join("-", "sleeper", | ||
| Utils.cleanInstanceId(this.instanceProperties), "statestore-commit-cluster"); | ||
| Cluster cluster = Cluster.Builder.create(this, "cluster") | ||
| .clusterName(clusterName) | ||
| .vpc(vpc) | ||
| .build(); | ||
|
|
||
| String ec2InstanceType = this.instanceProperties.get(TableStateProperty.STATESTORE_COMMITTER_EC2_INSTANCE_TYPE); | ||
|
|
||
| cluster.addAsgCapacityProvider(AsgCapacityProvider.Builder.create(this, "capacity-provider") | ||
| .autoScalingGroup(AutoScalingGroup.Builder.create(this, "asg") | ||
| .launchTemplate(LaunchTemplate.Builder.create(this, "instance-template") | ||
| .instanceType(new InstanceType(ec2InstanceType)) | ||
| .machineImage(EcsOptimizedImage.amazonLinux2023(AmiHardwareType.ARM)) | ||
| .requireImdsv2(true) | ||
| .userData(UserData.forLinux()) | ||
| .role(Role.Builder.create(this, "role") | ||
| .assumedBy(ServicePrincipal.fromStaticServicePrincipleName("ec2.amazonaws.com")) | ||
| .build()) | ||
| .build()) | ||
| .vpc(vpc) | ||
| .minCapacity(0) | ||
| .maxCapacity(1) | ||
| .desiredCapacity(1) | ||
| .minHealthyPercentage(0) | ||
| .maxHealthyPercentage(100) | ||
| .defaultInstanceWarmup(Duration.seconds(30)) | ||
| .newInstancesProtectedFromScaleIn(false) | ||
| .updatePolicy(UpdatePolicy.rollingUpdate()) | ||
| .build()) | ||
| .instanceWarmupPeriod(30) | ||
| .enableManagedTerminationProtection(false) | ||
| .build()); | ||
|
|
||
| IRepository repository = Repository.fromRepositoryName(this, "ecr", DockerDeployment.STATESTORE_COMMITTER.getEcrRepositoryName(this.instanceProperties)); | ||
| ContainerImage containerImage = ContainerImage.fromEcrRepository(repository, this.instanceProperties.get(VERSION)); | ||
|
|
||
| ILogGroup logGroup = loggingStack.getLogGroup(LogGroupRef.STATESTORE_COMMITTER); | ||
|
|
||
| Map<String, String> environmentVariables = EnvironmentUtils.createDefaultEnvironment(this.instanceProperties); | ||
| environmentVariables.put(Utils.AWS_REGION, this.instanceProperties.get(REGION)); | ||
|
|
||
| if (this.instanceProperties.getEnumValue(TableStateProperty.STATESTORE_COMMITTER_PLATFORM, StateStoreCommitterPlatform.class).equals(StateStoreCommitterPlatform.EC2)) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This duplicates the check in the constructor, so the else branch of this if statement will never be executed. Should this be removed? |
||
| Ec2TaskDefinition taskDefinition = Ec2TaskDefinition.Builder.create(this, "task-definition") | ||
| .family(String.join("-", Utils.cleanInstanceId(this.instanceProperties), "StateStoreCommitterOnEC2")) | ||
| .build(); | ||
|
|
||
| taskDefinition.addContainer("committer", ContainerDefinitionOptions.builder() | ||
| .containerName("committer") | ||
| .image(containerImage) | ||
| .command(List.of(instanceId, commitQueue.getQueueUrl())) | ||
| .environment(environmentVariables) | ||
| .memoryReservationMiB(1024) | ||
| .logging(Utils.createECSContainerLogDriver(logGroup)) | ||
| .build()); | ||
|
|
||
| commitQueue.grantConsumeMessages(taskDefinition.getTaskRole()); | ||
| configBucketStack.grantRead(taskDefinition.getTaskRole()); | ||
| tableIndexStack.grantRead(taskDefinition.getTaskRole()); | ||
| stateStoreStacks.grantReadWriteAllFilesAndPartitions(taskDefinition.getTaskRole()); | ||
|
|
||
| Ec2Service.Builder.create(this, "service") | ||
| .cluster(cluster) | ||
| .taskDefinition(taskDefinition) | ||
| .desiredCount(1) | ||
| .build(); | ||
|
|
||
| } else { | ||
| throw new IllegalArgumentException( | ||
| "Unknown value for " + | ||
| TableStateProperty.STATESTORE_COMMITTER_PLATFORM.getPropertyName() + ": " + | ||
| this.instanceProperties.getEnumValue( | ||
| TableStateProperty.STATESTORE_COMMITTER_PLATFORM, | ||
| StateStoreCommitterPlatform.class | ||
| ) | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| private void lambdaToCommitStateStoreUpdates( | ||
| LoggingStack loggingStack, ManagedPoliciesStack policiesStack, SleeperLambdaCode lambdaCode, | ||
| ConfigBucketStack configBucketStack, TableIndexStack tableIndexStack, StateStoreStacks stateStoreStacks, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,7 +113,7 @@ public List<StackDockerImage> getAllImagesToUpload() { | |
|
|
||
| private Stream<StackDockerImage> dockerDeploymentImages(Collection<OptionalStack> stacks) { | ||
| return dockerDeployments.stream() | ||
| .filter(deployment -> stacks.contains(deployment.getOptionalStack())) | ||
| .filter(deployment -> deployment.getOptionalStack() == null || stacks.contains(deployment.getOptionalStack())) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is failing some tests in AdminClientPropertiesStoreIT, because the getImagesToUploadOnUpdate method assumes that the only images to be uploaded are for optional stacks. The intended behaviour of getImagesToUploadOnUpdate is to upload images for optional stacks that weren't in the instance before. We could handle optional stack images separately from core images, or potentially treat the new state store platform similarly to optional stacks, so that the image will only be uploaded when you choose that platform. In either case this needs unit test coverage, ideally in UploadDockerImagesToEcrTest. |
||
| .map(StackDockerImage::fromDockerDeployment); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To ignore specific failures like this, we use a SuppressFBWarnings annotation. That's in the Maven dependency com.github.spotbugs:spotbugs-annotations.