diff --git a/internal/backend/remote-state/s3/backend.go b/internal/backend/remote-state/s3/backend.go index f04c6cdae051..415315605b5b 100644 --- a/internal/backend/remote-state/s3/backend.go +++ b/internal/backend/remote-state/s3/backend.go @@ -46,6 +46,7 @@ type Backend struct { acl string kmsKeyID string ddbTable string + useLockFile bool workspaceKeyPrefix string skipS3Checksum bool } @@ -152,6 +153,11 @@ func (b *Backend) ConfigSchema() *configschema.Block { Optional: true, Description: "DynamoDB table for state locking and consistency", }, + "use_lockfile": { + Type: cty.Bool, + Optional: true, + Description: "(Experimental) Whether to use a lockfile for locking the state file.", + }, "profile": { Type: cty.String, Optional: true, @@ -822,6 +828,7 @@ func (b *Backend) Configure(obj cty.Value) tfdiags.Diagnostics { b.serverSideEncryption = boolAttr(obj, "encrypt") b.kmsKeyID = stringAttr(obj, "kms_key_id") b.ddbTable = stringAttr(obj, "dynamodb_table") + b.useLockFile = boolAttr(obj, "use_lockfile") b.skipS3Checksum = boolAttr(obj, "skip_s3_checksum") if _, ok := stringAttrOk(obj, "kms_key_id"); ok { diff --git a/internal/backend/remote-state/s3/backend_state.go b/internal/backend/remote-state/s3/backend_state.go index 2ac9e0397d20..07726d245814 100644 --- a/internal/backend/remote-state/s3/backend_state.go +++ b/internal/backend/remote-state/s3/backend_state.go @@ -27,6 +27,8 @@ const ( // defaultWorkspaceKeyPrefix is the default prefix for workspace storage. // The colon is used to reduce the chance of name conflicts with existing objects. defaultWorkspaceKeyPrefix = "env:" + // lockFileSuffix defines the suffix for Terraform state lock files. + lockFileSuffix = ".tflock" ) func (b *Backend) Workspaces() ([]string, error) { @@ -163,6 +165,8 @@ func (b *Backend) remoteClient(name string) (*RemoteClient, error) { kmsKeyID: b.kmsKeyID, ddbTable: b.ddbTable, skipS3Checksum: b.skipS3Checksum, + lockFilePath: b.getLockFilePath(name), + useLockFile: b.useLockFile, } return client, nil @@ -276,3 +280,9 @@ func newBucketRegionError(requestRegion, bucketRegion string) bucketRegionError func (err bucketRegionError) Error() string { return fmt.Sprintf("requested bucket from %q, actual location %q", err.requestRegion, err.bucketRegion) } + +// getLockFilePath returns the path to the lock file for the given Terraform state. +// For `default.tfstate`, the lock file is stored at `default.tfstate.tflock`. +func (b *Backend) getLockFilePath(name string) string { + return b.path(name) + lockFileSuffix +} diff --git a/internal/backend/remote-state/s3/backend_test.go b/internal/backend/remote-state/s3/backend_test.go index 037135484c71..72ed17d09712 100644 --- a/internal/backend/remote-state/s3/backend_test.go +++ b/internal/backend/remote-state/s3/backend_test.go @@ -38,6 +38,7 @@ import ( "github.com/hashicorp/terraform/internal/configs/hcl2shim" "github.com/hashicorp/terraform/internal/states" "github.com/hashicorp/terraform/internal/states/remote" + "github.com/hashicorp/terraform/internal/states/statemgr" "github.com/hashicorp/terraform/internal/tfdiags" ) @@ -157,6 +158,162 @@ func TestBackendConfig_original(t *testing.T) { } } +func TestBackendConfig_withLockfile(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + region := "us-west-1" + + config := map[string]interface{}{ + "region": region, + "bucket": "tf-test", + "key": "state", + "encrypt": true, + "use_lockfile": true, + } + + b := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(config)).(*Backend) + + if b.awsConfig.Region != region { + t.Fatalf("Incorrect region was populated") + } + if b.awsConfig.RetryMaxAttempts != 5 { + t.Fatalf("Default max_retries was not set") + } + if b.bucketName != "tf-test" { + t.Fatalf("Incorrect bucketName was populated") + } + if b.keyName != "state" { + t.Fatalf("Incorrect keyName was populated") + } + + if b.useLockFile != true { + t.Fatalf("Expected useLockFile to be true") + } + + credentials, err := b.awsConfig.Credentials.Retrieve(ctx) + if err != nil { + t.Fatalf("Error when requesting credentials") + } + if credentials.AccessKeyID == "" { + t.Fatalf("No Access Key Id was populated") + } + if credentials.SecretAccessKey == "" { + t.Fatalf("No Secret Access Key was populated") + } + + // Check S3 Endpoint + expectedS3Endpoint := defaultEndpointS3(region) + var s3Endpoint string + _, err = b.s3Client.ListBuckets(ctx, &s3.ListBucketsInput{}, + func(opts *s3.Options) { + opts.APIOptions = append(opts.APIOptions, + addRetrieveEndpointURLMiddleware(t, &s3Endpoint), + addCancelRequestMiddleware(), + ) + }, + ) + if err == nil { + t.Fatal("Checking S3 Endpoint: Expected an error, got none") + } else if !errors.Is(err, errCancelOperation) { + t.Fatalf("Checking S3 Endpoint: Unexpected error: %s", err) + } + + if s3Endpoint != expectedS3Endpoint { + t.Errorf("Checking S3 Endpoint: expected endpoint %q, got %q", expectedS3Endpoint, s3Endpoint) + } +} + +func TestBackendConfig_multiLock(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + region := "us-west-1" + + config := map[string]interface{}{ + "region": region, + "bucket": "tf-test", + "key": "state", + "encrypt": true, + "use_lockfile": true, + "dynamodb_table": "dynamoTable", + } + + b := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(config)).(*Backend) + + if b.awsConfig.Region != region { + t.Fatalf("Incorrect region was populated") + } + if b.awsConfig.RetryMaxAttempts != 5 { + t.Fatalf("Default max_retries was not set") + } + if b.bucketName != "tf-test" { + t.Fatalf("Incorrect bucketName was populated") + } + if b.keyName != "state" { + t.Fatalf("Incorrect keyName was populated") + } + + if b.useLockFile != true { + t.Fatalf("Expected useLockFile to be true") + } + + credentials, err := b.awsConfig.Credentials.Retrieve(ctx) + if err != nil { + t.Fatalf("Error when requesting credentials") + } + if credentials.AccessKeyID == "" { + t.Fatalf("No Access Key Id was populated") + } + if credentials.SecretAccessKey == "" { + t.Fatalf("No Secret Access Key was populated") + } + + // Check S3 Endpoint + expectedS3Endpoint := defaultEndpointS3(region) + var s3Endpoint string + _, err = b.s3Client.ListBuckets(ctx, &s3.ListBucketsInput{}, + func(opts *s3.Options) { + opts.APIOptions = append(opts.APIOptions, + addRetrieveEndpointURLMiddleware(t, &s3Endpoint), + addCancelRequestMiddleware(), + ) + }, + ) + if err == nil { + t.Fatal("Checking S3 Endpoint: Expected an error, got none") + } else if !errors.Is(err, errCancelOperation) { + t.Fatalf("Checking S3 Endpoint: Unexpected error: %s", err) + } + + if s3Endpoint != expectedS3Endpoint { + t.Errorf("Checking S3 Endpoint: expected endpoint %q, got %q", expectedS3Endpoint, s3Endpoint) + } + + // Check DynamoDB Endpoint + expectedDynamoDBEndpoint := defaultEndpointDynamo(region) + var dynamoDBEndpoint string + _, err = b.dynClient.ListTables(ctx, &dynamodb.ListTablesInput{}, + func(opts *dynamodb.Options) { + opts.APIOptions = append(opts.APIOptions, + addRetrieveEndpointURLMiddleware(t, &dynamoDBEndpoint), + addCancelRequestMiddleware(), + ) + }, + ) + if err == nil { + t.Fatal("Checking DynamoDB Endpoint: Expected an error, got none") + } else if !errors.Is(err, errCancelOperation) { + t.Fatalf("Checking DynamoDB Endpoint: Unexpected error: %s", err) + } + + if dynamoDBEndpoint != expectedDynamoDBEndpoint { + t.Errorf("Checking DynamoDB Endpoint: expected endpoint %q, got %q", expectedDynamoDBEndpoint, dynamoDBEndpoint) + } +} + func TestBackendConfig_InvalidRegion(t *testing.T) { testACC(t) @@ -1850,7 +2007,253 @@ func TestBackendLocked(t *testing.T) { backend.TestBackendStateForceUnlock(t, b1, b2) } -func TestBackendKmsKeyId(t *testing.T) { +func TestBackendLockedWithFile(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "region": "us-west-2", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + + backend.TestBackendStateLocks(t, b1, b2) + backend.TestBackendStateForceUnlock(t, b1, b2) +} + +func TestBackendLockedWithFileAndDynamoDB(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + createDynamoDBTable(ctx, t, b1.dynClient, bucketName) + defer deleteDynamoDBTable(ctx, t, b1.dynClient, bucketName) + + backend.TestBackendStateLocks(t, b1, b2) + backend.TestBackendStateForceUnlock(t, b1, b2) +} + +func TestBackendLockedMixedFileAndDynamoDB(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + createDynamoDBTable(ctx, t, b1.dynClient, bucketName) + defer deleteDynamoDBTable(ctx, t, b1.dynClient, bucketName) + + backend.TestBackendStateLocks(t, b1, b2) + backend.TestBackendStateForceUnlock(t, b1, b2) +} + +func TestBackend_LockFileCleanupOnDynamoDBLock(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": false, // Only use DynamoDB + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, // Use both DynamoDB and lockfile + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + createDynamoDBTable(ctx, t, b1.dynClient, bucketName) + defer deleteDynamoDBTable(ctx, t, b1.dynClient, bucketName) + + backend.TestBackendStateLocks(t, b1, b2) + + // Attempt to retrieve the lock file from S3. + _, err := b1.s3Client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(b1.bucketName), + Key: aws.String(b1.keyName + ".tflock"), + }) + // We expect an error here, indicating that the lock file does not exist. + // The absence of the lock file is expected, as it should have been + // cleaned up following a failed lock acquisition due to `b1` already + // acquiring a DynamoDB lock. + if err != nil { + if !IsA[*s3types.NoSuchKey](err) { + t.Fatalf("unexpected error: %s", err) + } + } else { + t.Fatalf("expected error, got none") + } +} + +func TestBackend_LockDeletedOutOfBand(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "dynamodb_table": bucketName, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + createDynamoDBTable(ctx, t, b1.dynClient, bucketName) + defer deleteDynamoDBTable(ctx, t, b1.dynClient, bucketName) + + testBackendStateLockDeletedOutOfBand(ctx, t, b1) +} + +func TestBackend_KmsKeyId(t *testing.T) { + testACC(t) + kmsKeyID := os.Getenv("TF_S3_TEST_KMS_KEY_ID") + if kmsKeyID == "" { + t.Skip("TF_S3_KMS_KEY_ID is empty. Set this variable to an existing KMS key ID to run this test.") + } + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "kms_key_id": kmsKeyID, + "use_lockfile": true, + "region": "us-west-2", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "kms_key_id": kmsKeyID, + "use_lockfile": true, + "region": "us-west-2", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + + backend.TestBackendStateLocks(t, b1, b2) + backend.TestBackendStateForceUnlock(t, b1, b2) +} + +func TestBackend_ACL(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + keyName := "test/state" + + b1 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "region": "us-west-2", + "acl": "bucket-owner-full-control", + })).(*Backend) + + b2 := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "key": keyName, + "encrypt": true, + "use_lockfile": true, + "region": "us-west-2", + "acl": "bucket-owner-full-control", + })).(*Backend) + + createS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + defer deleteS3Bucket(ctx, t, b1.s3Client, bucketName, b1.awsConfig.Region) + + backend.TestBackendStateLocks(t, b1, b2) + backend.TestBackendStateForceUnlock(t, b1, b2) +} + +func TestBackendConfigKmsKeyId(t *testing.T) { testACC(t) testCases := map[string]struct { @@ -2223,6 +2626,61 @@ func TestBackendPrefixInWorkspace(t *testing.T) { } } +// ensure that we create the lock file in the correct location when using a +// workspace prefix. +func TestBackendLockFileWithPrefix(t *testing.T) { + testACC(t) + + ctx := context.TODO() + + bucketName := fmt.Sprintf("terraform-remote-s3-test-%x", time.Now().Unix()) + + workspacePrefix := "prefix" + key := "test/test-env.tfstate" + + b := backend.TestBackendConfig(t, New(), backend.TestWrapConfig(map[string]interface{}{ + "bucket": bucketName, + "use_lockfile": true, + "key": key, + "workspace_key_prefix": workspacePrefix, + })).(*Backend) + + createS3Bucket(ctx, t, b.s3Client, bucketName, b.awsConfig.Region, s3BucketWithVersioning) + defer deleteS3Bucket(ctx, t, b.s3Client, bucketName, b.awsConfig.Region) + + // get a state that contains the prefix as a substring + sMgr, err := b.StateMgr("env-1") + if err != nil { + t.Fatal(err) + } + if err := sMgr.RefreshState(); err != nil { + t.Fatal(err) + } + + if err := checkStateList(b, []string{"default", "env-1"}); err != nil { + t.Fatal(err) + } + + // Check if the lock file is created in the correct location + // + // If created and cleaned up correctly, a delete marker should + // be present at the lock file key location. + lockFileKey := fmt.Sprintf("%s/env-1/%s.tflock", workspacePrefix, key) + out, err := b.s3Client.ListObjectVersions(ctx, &s3.ListObjectVersionsInput{ + Bucket: aws.String(bucketName), + }) + + found := false + for _, item := range out.DeleteMarkers { + if aws.ToString(item.Key) == lockFileKey { + found = true + } + } + if !found { + t.Fatalf("lock file %q not found in expected location", lockFileKey) + } +} + func TestBackendRestrictedRoot_Default(t *testing.T) { testACC(t) @@ -2352,6 +2810,7 @@ func TestBackendWrongRegion(t *testing.T) { func TestBackendS3ObjectLock(t *testing.T) { testACC(t) + objectLockPreCheck(t) ctx := context.TODO() @@ -2722,6 +3181,84 @@ func TestBackend_CoerceValue(t *testing.T) { } } +func testBackendStateLockDeletedOutOfBand(ctx context.Context, t *testing.T, b1 *Backend) { + t.Helper() + + tableName := b1.ddbTable + bucketName := b1.bucketName + s3StateKey := b1.keyName + s3LockKey := s3StateKey + lockFileSuffix + // The dynamoDB LockID value is the full statfile path (not the generated UUID) + ddbLockID := fmt.Sprintf("%s/%s", bucketName, s3StateKey) + + // Get the default state + b1StateMgr, err := b1.StateMgr(backend.DefaultStateName) + if err != nil { + t.Fatalf("error: %s", err) + } + if err := b1StateMgr.RefreshState(); err != nil { + t.Fatalf("bad: %s", err) + } + + // Fast exit if this doesn't support locking at all + if _, ok := b1StateMgr.(statemgr.Locker); !ok { + t.Logf("TestBackend: backend %T doesn't support state locking, not testing", b1) + return + } + + t.Logf("testing deletion of a dynamoDB state lock out of band") + + // Reassign so its obvious whats happening + locker := b1StateMgr.(statemgr.Locker) + + info := statemgr.NewLockInfo() + info.Operation = "test" + info.Who = "clientA" + + lockID, err := locker.Lock(info) + if err != nil { + t.Fatal("unable to get initial lock:", err) + } + + getInput := &s3.GetObjectInput{ + Bucket: &bucketName, + Key: &s3LockKey, + } + + // Verify the s3 lock file exists + if _, err = b1.s3Client.GetObject(ctx, getInput); err != nil { + t.Fatal("failed to get s3 lock file:", err) + } + + deleteInput := &dynamodb.DeleteItemInput{ + Key: map[string]dynamodbtypes.AttributeValue{ + "LockID": &dynamodbtypes.AttributeValueMemberS{ + Value: ddbLockID, + }, + }, + TableName: aws.String(tableName), + } + + // Delete the DynamoDB lock out of band + if _, err = b1.dynClient.DeleteItem(ctx, deleteInput); err != nil { + t.Fatal("failed to delete dynamodb item:", err) + } + + if err = locker.Unlock(lockID); err == nil { + t.Fatal("expected unlock failure, no error returned") + } + + // Verify the s3 lock file was still cleaned up by Unlock + _, err = b1.s3Client.GetObject(ctx, getInput) + if err != nil { + if !IsA[*s3types.NoSuchKey](err) { + t.Fatalf("unexpected error getting s3 lock file: %s", err) + } + } else { + t.Fatalf("expected error getting s3 lock file, got none") + } +} + func testGetWorkspaceForKey(b *Backend, key string, expected string) error { if actual := b.keyEnv(key); actual != expected { return fmt.Errorf("incorrect workspace for key[%q]. Expected[%q]: Actual[%q]", key, expected, actual) @@ -2840,13 +3377,31 @@ func deleteS3Bucket(ctx context.Context, t *testing.T, s3Client *s3.Client, buck warning := "WARNING: Failed to delete the test S3 bucket. It may have been left in your AWS account and may incur storage charges. (error was %s)" // first we have to get rid of the env objects, or we can't delete the bucket - resp, err := s3Client.ListObjects(ctx, &s3.ListObjectsInput{Bucket: &bucketName}, s3WithRegion(region)) + resp, err := s3Client.ListObjectVersions(ctx, &s3.ListObjectVersionsInput{Bucket: &bucketName}, s3WithRegion(region)) if err != nil { t.Logf(warning, err) return } - for _, obj := range resp.Contents { - if _, err := s3Client.DeleteObject(ctx, &s3.DeleteObjectInput{Bucket: &bucketName, Key: obj.Key}, s3WithRegion(region)); err != nil { + + for _, obj := range resp.Versions { + input := &s3.DeleteObjectInput{ + Bucket: &bucketName, + Key: obj.Key, + VersionId: obj.VersionId, + } + if _, err := s3Client.DeleteObject(ctx, input, s3WithRegion(region)); err != nil { + // this will need cleanup no matter what, so just warn and exit + t.Logf(warning, err) + return + } + } + for _, obj := range resp.DeleteMarkers { + input := &s3.DeleteObjectInput{ + Bucket: &bucketName, + Key: obj.Key, + VersionId: obj.VersionId, + } + if _, err := s3Client.DeleteObject(ctx, input, s3WithRegion(region)); err != nil { // this will need cleanup no matter what, so just warn and exit t.Logf(warning, err) return @@ -2886,6 +3441,7 @@ func createDynamoDBTable(ctx context.Context, t *testing.T, dynClient *dynamodb. TableName: aws.String(tableName), } + t.Logf("creating DynamoDB table %s", tableName) _, err := dynClient.CreateTable(ctx, createInput) if err != nil { t.Fatal(err) @@ -3184,3 +3740,21 @@ func defaultEndpointS3(region string) string { return ep.URI.String() } + +// objectLockPreCheck gates tests using object lock enabled buckets +// by checking for a configured environment variable. +// +// With object lock enabled, the statefile object written to the bucket +// cannot be deleted by the deleteS3Bucket test helper, resulting in an +// orphaned bucket after acceptance tests complete. Deletion of this +// leftover resource must be completed out of band by waiting until the +// default "Compliance" retention period of the objects has expired +// (1 day), emptying the bucket, and deleting it. +// +// Because clean up requires additional action outside the scope of the +// acceptance test, tests including this check are skipped by default. +func objectLockPreCheck(t *testing.T) { + if os.Getenv("TF_S3_OBJECT_LOCK_TEST") == "" { + t.Skip("s3 backend tests using object lock enabled buckets require setting TF_S3_OBJECT_LOCK_TEST") + } +} diff --git a/internal/backend/remote-state/s3/client.go b/internal/backend/remote-state/s3/client.go index ab813a1db50c..e56703bef908 100644 --- a/internal/backend/remote-state/s3/client.go +++ b/internal/backend/remote-state/s3/client.go @@ -12,6 +12,7 @@ import ( "encoding/json" "errors" "fmt" + "io" "log" "time" @@ -48,6 +49,8 @@ type RemoteClient struct { kmsKeyID string ddbTable string skipS3Checksum bool + lockFilePath string + useLockFile bool } var ( @@ -270,11 +273,13 @@ func (c *RemoteClient) Delete() error { return nil } +// Lock attempts to obtain a lock, returning the lock ID if successful. func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { ctx := context.TODO() log := c.logger(operationLockerLock) - if c.ddbTable == "" { + // no file, no dynamodb + if !c.useLockFile && c.ddbTable == "" { return "", nil } @@ -293,8 +298,104 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { ctx, baselog := baselogging.NewHcLogger(ctx, log) ctx = baselogging.RegisterLogger(ctx, baselog) - log.Info("Locking remote state") + // file only, no dynamodb + if c.useLockFile && c.ddbTable == "" { + log.Info("Attempting to lock remote state (S3 Native only)...") + if err := c.lockWithFile(ctx, info, log); err != nil { + return "", err + } + + log.Info("Locked remote state (S3 Native only)") + return info.ID, nil + } + + // dynamodb only, no file + if !c.useLockFile && c.ddbTable != "" { + log.Info("Attempting to lock remote state (DynamoDB only)...") + if err := c.lockWithDynamoDB(ctx, info); err != nil { + return "", err + } + + log.Info("Locked remote state (DynamoDB only)") + return info.ID, nil + } + + // double locking: dynamodb + file (design decision: both must succeed) + log.Info("Attempting to lock remote state (S3 Native and DynamoDB)...") + if err := c.lockWithFile(ctx, info, log); err != nil { + return "", err + } + + if err := c.lockWithDynamoDB(ctx, info); err != nil { + // Release the file lock if attempting to acquire the DynamoDB lock fails. + if unlockErr := c.unlockWithFile(ctx, info.ID, &statemgr.LockError{}, log); unlockErr != nil { + return "", fmt.Errorf("failed to clean up file lock after DynamoDB lock error: %v; original error: %w", unlockErr, err) + } + return "", err + } + + log.Info("Locked remote state (S3 Native and DynamoDB)") + return info.ID, nil +} + +// lockWithFile attempts to acquire a lock on the remote state by uploading a lock file to Amazon S3. +// +// This method is used when the S3 native locking mechanism is in use. It uploads a lock file (JSON) +// to an S3 bucket to establish a lock on the state file. If the lock file does not already +// exist, the operation will succeed, acquiring the lock. If the lock file already exists, the operation +// will fail due to a conditional write, indicating that the lock is already held by another Terraform client. +func (c *RemoteClient) lockWithFile(ctx context.Context, info *statemgr.LockInfo, log hclog.Logger) error { + lockFileJson, err := json.Marshal(info) + if err != nil { + return err + } + + input := &s3.PutObjectInput{ + ContentType: aws.String("application/json"), + Body: bytes.NewReader(lockFileJson), + Bucket: aws.String(c.bucketName), + Key: aws.String(c.lockFilePath), + IfNoneMatch: aws.String("*"), + } + + if c.serverSideEncryption { + if c.kmsKeyID != "" { + input.SSEKMSKeyId = aws.String(c.kmsKeyID) + input.ServerSideEncryption = s3types.ServerSideEncryptionAwsKms + } else if c.customerEncryptionKey != nil { + input.SSECustomerKey = aws.String(base64.StdEncoding.EncodeToString(c.customerEncryptionKey)) + input.SSECustomerAlgorithm = aws.String(string(s3EncryptionAlgorithm)) + input.SSECustomerKeyMD5 = aws.String(c.getSSECustomerKeyMD5()) + } else { + input.ServerSideEncryption = s3EncryptionAlgorithm + } + } + + if c.acl != "" { + input.ACL = s3types.ObjectCannedACL(c.acl) + } + + log.Debug("Uploading lock file") + + _, err = c.s3Client.PutObject(ctx, input) + if err != nil { + // Attempt to retrieve lock info from the file, and merge errors if it fails. + lockInfo, infoErr := c.getLockInfoWithFile(ctx) + if infoErr != nil { + err = errors.Join(err, infoErr) + } + + return &statemgr.LockError{ + Err: err, + Info: lockInfo, + } + } + + return nil +} + +func (c *RemoteClient) lockWithDynamoDB(ctx context.Context, info *statemgr.LockInfo) error { putParams := &dynamodb.PutItemInput{ Item: map[string]dynamodbtypes.AttributeValue{ "LockID": &dynamodbtypes.AttributeValueMemberS{ @@ -307,10 +408,11 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { TableName: aws.String(c.ddbTable), ConditionExpression: aws.String("attribute_not_exists(LockID)"), } + _, err := c.dynClient.PutItem(ctx, putParams) if err != nil { - lockInfo, infoErr := c.getLockInfo(ctx) + lockInfo, infoErr := c.getLockInfoWithDynamoDB(ctx) if infoErr != nil { err = errors.Join(err, infoErr) } @@ -319,42 +421,147 @@ func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { Err: err, Info: lockInfo, } - return "", lockErr + return lockErr } - return info.ID, nil + return nil } +// Unlock releases a lock previously acquired by Lock. func (c *RemoteClient) Unlock(id string) error { ctx := context.TODO() log := c.logger(operationLockerUnlock) - log = logWithLockID(log, id) + // no file, no dynamodb + if !c.useLockFile && c.ddbTable == "" { + return nil + } + log = logWithLockID(log, id) ctx, baselog := baselogging.NewHcLogger(ctx, log) ctx = baselogging.RegisterLogger(ctx, baselog) - if c.ddbTable == "" { + lockErr := &statemgr.LockError{} + + // file only, no dynamodb + if c.useLockFile && c.ddbTable == "" { + log.Info("Attempting to unlock remote state (S3 Native only)...") + if err := c.unlockWithFile(ctx, id, lockErr, log); err != nil { + lockErr.Err = err + return lockErr + } + + log.Info("Unlocked remote state (S3 Native only)") return nil } - lockErr := &statemgr.LockError{} + // dynamodb only, no file + if !c.useLockFile && c.ddbTable != "" { + log.Info("Attempting to unlock remote state (DynamoDB only)...") + if err := c.unlockWithDynamoDB(ctx, id, lockErr); err != nil { + lockErr.Err = err + return lockErr + } + + log.Info("Unlocked remote state (DynamoDB only)") + return nil + } + + // Double unlocking: DynamoDB + file + log.Info("Attempting to unlock remote state (S3 Native and DynamoDB)...") + + ferr := c.unlockWithFile(ctx, id, lockErr, log) + derr := c.unlockWithDynamoDB(ctx, id, lockErr) - log.Info("Unlocking remote state") + if ferr != nil && derr != nil { + lockErr.Err = fmt.Errorf("failed to unlock both S3 and DynamoDB: S3 error: %v, DynamoDB error: %v", ferr, derr) + return lockErr + } + + if ferr != nil { + lockErr.Err = fmt.Errorf("failed to unlock S3: %v", ferr) + return lockErr + } + if derr != nil { + lockErr.Err = fmt.Errorf("failed to unlock DynamoDB: %v", derr) + return lockErr + } + + log.Info("Unlocked remote state (S3 Native and DynamoDB)") + return nil +} + +// unlockWithFile attempts to unlock the remote state by deleting the lock file from Amazon S3. +// +// This method is used when the S3 native locking mechanism is in use, which uses a `.tflock` file +// to manage state locking. The function deletes the lock file to release the lock, allowing other +// Terraform clients to acquire the lock on the same state file. +func (c *RemoteClient) unlockWithFile(ctx context.Context, id string, lockErr *statemgr.LockError, log hclog.Logger) error { + getInput := &s3.GetObjectInput{ + Bucket: aws.String(c.bucketName), + Key: aws.String(c.lockFilePath), + } + + if c.serverSideEncryption && c.customerEncryptionKey != nil { + getInput.SSECustomerKey = aws.String(base64.StdEncoding.EncodeToString(c.customerEncryptionKey)) + getInput.SSECustomerAlgorithm = aws.String(s3EncryptionAlgorithm) + getInput.SSECustomerKeyMD5 = aws.String(c.getSSECustomerKeyMD5()) + } + + getOutput, err := c.s3Client.GetObject(ctx, getInput) + if err != nil { + return fmt.Errorf("unable to retrieve file from S3 bucket '%s' with key '%s': %w", c.bucketName, c.lockFilePath, err) + } + defer func() { + if cerr := getOutput.Body.Close(); cerr != nil { + log.Warn(fmt.Sprintf("failed to close S3 object body: %v", cerr)) + } + }() + + data, err := io.ReadAll(getOutput.Body) + if err != nil { + return fmt.Errorf("failed to read the body of the S3 object: %w", err) + } + + lockInfo := &statemgr.LockInfo{} + if err := json.Unmarshal(data, lockInfo); err != nil { + return fmt.Errorf("failed to unmarshal JSON data into LockInfo struct: %w", err) + } + lockErr.Info = lockInfo + + // Verify that the provided lock ID matches the lock ID of the retrieved lock file. + if lockInfo.ID != id { + return fmt.Errorf("lock ID '%s' does not match the existing lock ID '%s'", id, lockInfo.ID) + } + + // Delete the lock file to release the lock. + _, err = c.s3Client.DeleteObject(ctx, &s3.DeleteObjectInput{ + Bucket: aws.String(c.bucketName), + Key: aws.String(c.lockFilePath), + }) + + if err != nil { + return fmt.Errorf("failed to delete the lock file: %w", err) + } + + log.Debug(fmt.Sprintf("Deleted lock file: '%q'", c.lockFilePath)) + + return nil +} + +func (c *RemoteClient) unlockWithDynamoDB(ctx context.Context, id string, lockErr *statemgr.LockError) error { // TODO: store the path and lock ID in separate fields, and have proper // projection expression only delete the lock if both match, rather than // checking the ID from the info field first. - lockInfo, err := c.getLockInfo(ctx) + lockInfo, err := c.getLockInfoWithDynamoDB(ctx) if err != nil { - lockErr.Err = fmt.Errorf("failed to retrieve lock info for lock ID %q: %s", id, err) - return lockErr + return fmt.Errorf("failed to retrieve lock info for lock ID %q: %s", id, err) } lockErr.Info = lockInfo if lockInfo.ID != id { - lockErr.Err = fmt.Errorf("lock ID %q does not match existing lock (%q)", id, lockInfo.ID) - return lockErr + return fmt.Errorf("lock ID %q does not match existing lock (%q)", id, lockInfo.ID) } params := &dynamodb.DeleteItemInput{ @@ -368,8 +575,7 @@ func (c *RemoteClient) Unlock(id string) error { _, err = c.dynClient.DeleteItem(ctx, params) if err != nil { - lockErr.Err = err - return lockErr + return err } return nil } @@ -459,7 +665,36 @@ func (c *RemoteClient) deleteMD5(ctx context.Context) error { return nil } -func (c *RemoteClient) getLockInfo(ctx context.Context) (*statemgr.LockInfo, error) { +// getLockInfoWithFile retrieves and parses a lock file from an S3 bucket. +func (c *RemoteClient) getLockInfoWithFile(ctx context.Context) (*statemgr.LockInfo, error) { + // Attempt to retrieve the lock file from S3. + getOutput, err := c.s3Client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(c.bucketName), + Key: aws.String(c.lockFilePath), + }) + if err != nil { + return nil, fmt.Errorf("unable to retrieve file from S3 bucket '%s' with key '%s': %w", c.bucketName, c.lockFilePath, err) + } + defer func() { + if cerr := getOutput.Body.Close(); cerr != nil { + log.Printf("failed to close S3 object body: %v", cerr) + } + }() + + data, err := io.ReadAll(getOutput.Body) + if err != nil { + return nil, fmt.Errorf("failed to read the body of the S3 object: %w", err) + } + + lockInfo := &statemgr.LockInfo{} + if err := json.Unmarshal(data, lockInfo); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON data into LockInfo struct: %w", err) + } + + return lockInfo, nil +} + +func (c *RemoteClient) getLockInfoWithDynamoDB(ctx context.Context) (*statemgr.LockInfo, error) { getParams := &dynamodb.GetItemInput{ Key: map[string]dynamodbtypes.AttributeValue{ "LockID": &dynamodbtypes.AttributeValueMemberS{ diff --git a/internal/backend/remote-state/s3/client_test.go b/internal/backend/remote-state/s3/client_test.go index 6d15f6ee7786..a1c7722ea1cf 100644 --- a/internal/backend/remote-state/s3/client_test.go +++ b/internal/backend/remote-state/s3/client_test.go @@ -345,6 +345,7 @@ func TestRemoteClient_stateChecksum(t *testing.T) { func TestRemoteClientPutLargeUploadWithObjectLock(t *testing.T) { testACC(t) + objectLockPreCheck(t) ctx := context.TODO() diff --git a/website/docs/language/backend/s3.mdx b/website/docs/language/backend/s3.mdx index d6087d04acdf..e68514c6232f 100644 --- a/website/docs/language/backend/s3.mdx +++ b/website/docs/language/backend/s3.mdx @@ -166,6 +166,7 @@ The following configuration is required: The following configuration is optional: +* `use_lockfile` - (Experimental, Optional) Whether to use a lockfile for locking the state file. Defaults to `false`. * `access_key` - (Optional) AWS access key. If configured, must also configure `secret_key`. This can also be sourced from the `AWS_ACCESS_KEY_ID` environment variable, AWS shared credentials file (e.g. `~/.aws/credentials`), or AWS shared configuration file (e.g. `~/.aws/config`). * `allowed_account_ids` - (Optional) List of allowed AWS account IDs to prevent potential destruction of a live environment. Conflicts with `forbidden_account_ids`. * `custom_ca_bundle` - (Optional) File containing custom root and intermediate certificates. Can also be set using the `AWS_CA_BUNDLE` environment variable. Setting ca_bundle in the shared config file is not supported. @@ -311,23 +312,33 @@ The following configuration is required: The following configuration is optional: -* `acl` - (Optional) [Canned ACL](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl) to be applied to the state file. -* `encrypt` - (Optional) Enable [server side encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingServerSideEncryption.html) of the state file. +* `acl` - (Optional) [Canned ACL](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl) to be applied to the state and lock files. +* `encrypt` - (Optional) Enable [server side encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingServerSideEncryption.html) of the state and lock files. * `endpoint` - (Optional, **Deprecated**) Custom endpoint URL for the AWS S3 API. Use `endpoints.s3` instead. * `force_path_style` - (Optional, **Deprecated**) Enable path-style S3 URLs (`https:///` instead of `https://.`). -* `kms_key_id` - (Optional) Amazon Resource Name (ARN) of a Key Management Service (KMS) Key to use for encrypting the state. Note that if this value is specified, Terraform will need `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` permissions on this KMS key. -* `sse_customer_key` - (Optional) The key to use for encrypting state with [Server-Side Encryption with Customer-Provided Keys (SSE-C)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html). This is the base64-encoded value of the key, which must decode to 256 bits. This can also be sourced from the `AWS_SSE_CUSTOMER_KEY` environment variable, which is recommended due to the sensitivity of the value. Setting it inside a terraform file will cause it to be persisted to disk in `terraform.tfstate`. +* `kms_key_id` - (Optional) Amazon Resource Name (ARN) of a Key Management Service (KMS) Key to use for encrypting the state and lock files. Note that if this value is specified, Terraform will need `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` permissions on this KMS key. +* `sse_customer_key` - (Optional) The key to use for encrypting state and lock files with [Server-Side Encryption with Customer-Provided Keys (SSE-C)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerSideEncryptionCustomerKeys.html). This is the base64-encoded value of the key, which must decode to 256 bits. This can also be sourced from the `AWS_SSE_CUSTOMER_KEY` environment variable, which is recommended due to the sensitivity of the value. Setting it inside a terraform file will cause it to be persisted to disk in `terraform.tfstate`. * `use_path_style` - (Optional) Enable path-style S3 URLs (`https:///` instead of `https://.`). * `workspace_key_prefix` - (Optional) Prefix applied to the state path inside the bucket. This is only relevant when using a non-default workspace. Defaults to `env:`. -### DynamoDB State Locking +### State Locking -The following configuration is optional: +State locking is an opt-in feature of the S3 backend. + +Locking can be enabled via an S3 "lockfile" (introduced as **experimental** in Terraform 1.10) or DynamoDB. +To support migration from older versions of Terraform which only support DynamoDB-based locking, the S3 and DynamoDB arguments below can be configured simultaneously. +In a future minor version the DynamoDB locking mechanism will be removed. + +To enable S3 state locking, use the following optional argument: + +* `use_lockfile` - (Optional, Experimental) Whether to use a lockfile for locking the state file. Defaults to `false`. + +To enable DynamoDB state locking, use the following optional arguments: * `dynamodb_endpoint` - (Optional, **Deprecated**) Custom endpoint URL for the AWS DynamoDB API. Use `endpoints.dynamodb` instead. -* `dynamodb_table` - (Optional) Name of DynamoDB Table to use for state locking and consistency. The table must have a partition key named `LockID` with type of `String`. If not configured, state locking will be disabled. +* `dynamodb_table` - (Optional) Name of DynamoDB Table to use for state locking and consistency. The table must have a partition key named `LockID` with type of `String`. ## Multi-account AWS Architecture