Skip to content

Commit 4d11def

Browse files
authored
Allow retries in DefaultKeyResolver.CanCreateAuthenticatedEncryptor (#54711)
* Allow retries in DefaultKeyResolver.CanCreateAuthenticatedEncryptor This code is trying to ensure that the selected key can be decrypted (i.e. is usable). It may fail if, for example, Azure KeyVault is unreachable due to connectivity issues. If it fails, there's a log message and then an immediately-activated key will be generated. An immediately-activated key can cause problems for sessions making requests to multiple app instances and those problems won't obviously be connected to the (almost silent) failure in CanCreateAuthenticatedEncryptor. Rather than effectively swallowing such errors, we should allow some retries. Part of #36157 * Roll our own Lazy that allows resets Retries against the actual `Key` type weren't working because the exception was getting cached in the key's lazy descriptor. Implement our own simple lazy and expose a method for clearing the cached value and exception.
1 parent ace008b commit 4d11def

File tree

5 files changed

+387
-29
lines changed

5 files changed

+387
-29
lines changed

src/DataProtection/DataProtection/src/KeyManagement/DefaultKeyResolver.cs

+93-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
using System;
55
using System.Collections.Generic;
6+
using System.Diagnostics;
67
using System.Linq;
8+
using System.Threading;
79
using Microsoft.AspNetCore.Cryptography;
810
using Microsoft.AspNetCore.DataProtection.AuthenticatedEncryption;
911
using Microsoft.AspNetCore.DataProtection.KeyManagement.Internal;
@@ -18,6 +20,13 @@ namespace Microsoft.AspNetCore.DataProtection.KeyManagement;
1820
/// </summary>
1921
internal sealed class DefaultKeyResolver : IDefaultKeyResolver
2022
{
23+
#if NETFRAMEWORK
24+
// Sadly, numerical AppContext data support was not added until 4.7 and we target 4.6
25+
private const string MaximumTotalDefaultKeyResolverRetriesSwitchKey = "Microsoft.AspNetCore.DataProtection.KeyManagement.DisableDefaultKeyResolverRetries";
26+
#else
27+
private const string MaximumTotalDefaultKeyResolverRetriesDataKey = "Microsoft.AspNetCore.DataProtection.KeyManagement.MaximumTotalDefaultKeyResolverRetries";
28+
#endif
29+
2130
/// <summary>
2231
/// The window of time before the key expires when a new key should be created
2332
/// and persisted to the keyring to ensure uninterrupted service.
@@ -28,6 +37,9 @@ internal sealed class DefaultKeyResolver : IDefaultKeyResolver
2837
/// </remarks>
2938
private readonly TimeSpan _keyPropagationWindow;
3039

40+
private readonly int _maxDecryptRetries;
41+
private readonly TimeSpan _decryptRetryDelay;
42+
3143
private readonly ILogger _logger;
3244

3345
/// <summary>
@@ -41,33 +53,95 @@ internal sealed class DefaultKeyResolver : IDefaultKeyResolver
4153
/// </remarks>
4254
private readonly TimeSpan _maxServerToServerClockSkew;
4355

44-
public DefaultKeyResolver()
45-
: this(NullLoggerFactory.Instance)
56+
public DefaultKeyResolver(IOptions<KeyManagementOptions> keyManagementOptions)
57+
: this(keyManagementOptions, NullLoggerFactory.Instance)
4658
{ }
4759

48-
public DefaultKeyResolver(ILoggerFactory loggerFactory)
60+
public DefaultKeyResolver(IOptions<KeyManagementOptions> keyManagementOptions, ILoggerFactory loggerFactory)
4961
{
5062
_keyPropagationWindow = KeyManagementOptions.KeyPropagationWindow;
5163
_maxServerToServerClockSkew = KeyManagementOptions.MaxServerClockSkew;
64+
_maxDecryptRetries = GetMaxDecryptRetriesFromAppContext() ?? keyManagementOptions.Value.MaximumTotalDefaultKeyResolverRetries;
65+
_decryptRetryDelay = keyManagementOptions.Value.DefaultKeyResolverRetryDelay;
5266
_logger = loggerFactory.CreateLogger<DefaultKeyResolver>();
5367
}
5468

55-
private bool CanCreateAuthenticatedEncryptor(IKey key)
69+
private static int? GetMaxDecryptRetriesFromAppContext()
5670
{
57-
try
71+
#if NETFRAMEWORK
72+
// Force to zero if retries are disabled. Otherwise, use the configured value
73+
return AppContext.TryGetSwitch(MaximumTotalDefaultKeyResolverRetriesSwitchKey, out var areRetriesDisabled) && areRetriesDisabled ? 0 : null;
74+
#else
75+
var data = AppContext.GetData(MaximumTotalDefaultKeyResolverRetriesDataKey);
76+
77+
// Programmatically-configured values are usually ints
78+
if (data is int count)
5879
{
59-
var encryptorInstance = key.CreateEncryptor();
60-
if (encryptorInstance == null)
61-
{
62-
CryptoUtil.Fail<IAuthenticatedEncryptor>("CreateEncryptorInstance returned null.");
63-
}
80+
return count;
81+
}
6482

65-
return true;
83+
// msbuild-configured values are usually strings
84+
if (data is string countStr && int.TryParse(countStr, out var parsed))
85+
{
86+
return parsed;
6687
}
67-
catch (Exception ex)
88+
89+
return null;
90+
#endif
91+
}
92+
93+
private bool CanCreateAuthenticatedEncryptor(IKey key, ref int retriesRemaining)
94+
{
95+
List<Exception>? exceptions = null;
96+
while (true)
6897
{
69-
_logger.KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed(key.KeyId, nameof(IKey.CreateEncryptor), ex);
70-
return false;
98+
try
99+
{
100+
var encryptorInstance = key.CreateEncryptor();
101+
if (encryptorInstance is null)
102+
{
103+
// This generally indicates a key (descriptor) without a corresponding IAuthenticatedEncryptorFactory,
104+
// which is not something that can succeed on retry - return immediately
105+
_logger.KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed(
106+
key.KeyId,
107+
nameof(IKey.CreateEncryptor),
108+
new InvalidOperationException($"{nameof(IKey.CreateEncryptor)} returned null."));
109+
return false;
110+
}
111+
112+
return true;
113+
}
114+
catch (Exception ex)
115+
{
116+
if (retriesRemaining > 0)
117+
{
118+
if (exceptions is null)
119+
{
120+
// The first failure doesn't count as a retry
121+
exceptions = [];
122+
}
123+
else
124+
{
125+
retriesRemaining--;
126+
}
127+
128+
exceptions.Add(ex);
129+
}
130+
131+
if (retriesRemaining <= 0) // Guard against infinite loops from programmer errors
132+
{
133+
_logger.KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed(key.KeyId, nameof(IKey.CreateEncryptor), exceptions is null ? ex : new AggregateException(exceptions));
134+
return false;
135+
}
136+
137+
_logger.RetryingMethodOfKeyAfterFailure(key.KeyId, nameof(IKey.CreateEncryptor), ex);
138+
}
139+
140+
// Reset the descriptor to allow for a retry
141+
(key as Key)?.ResetDescriptor();
142+
143+
// Don't retry immediately - allow a little time for the transient problem to clear
144+
Thread.Sleep(_decryptRetryDelay);
71145
}
72146
}
73147

@@ -94,12 +168,14 @@ where key.CreationDate > propagationCutoff
94168
orderby key.ActivationDate ascending, key.KeyId ascending
95169
select key).FirstOrDefault();
96170

171+
var decryptRetriesRemaining = _maxDecryptRetries;
172+
97173
if (preferredDefaultKey != null)
98174
{
99175
_logger.ConsideringKeyWithExpirationDateAsDefaultKey(preferredDefaultKey.KeyId, preferredDefaultKey.ExpirationDate);
100176

101177
// if the key has been revoked or is expired, it is no longer a candidate
102-
if (preferredDefaultKey.IsRevoked || preferredDefaultKey.IsExpired(now) || !CanCreateAuthenticatedEncryptor(preferredDefaultKey))
178+
if (preferredDefaultKey.IsRevoked || preferredDefaultKey.IsExpired(now) || !CanCreateAuthenticatedEncryptor(preferredDefaultKey, ref decryptRetriesRemaining))
103179
{
104180
_logger.KeyIsNoLongerUnderConsiderationAsDefault(preferredDefaultKey.KeyId);
105181
}
@@ -123,13 +199,14 @@ where key.CreationDate > propagationCutoff
123199
// keep trying until we find one that works.
124200
var unrevokedKeys = allKeys.Where(key => !key.IsRevoked);
125201
fallbackKey = (from key in (from key in unrevokedKeys
202+
where !ReferenceEquals(key, preferredDefaultKey) // Don't reconsider it as a fallback
126203
where key.CreationDate <= propagationCutoff
127204
orderby key.CreationDate descending
128205
select key).Concat(from key in unrevokedKeys
129206
where key.CreationDate > propagationCutoff
130207
orderby key.CreationDate ascending
131208
select key)
132-
where CanCreateAuthenticatedEncryptor(key)
209+
where CanCreateAuthenticatedEncryptor(key, ref decryptRetriesRemaining)
133210
select key).FirstOrDefault();
134211

135212
_logger.RepositoryContainsNoViableDefaultKey();

src/DataProtection/DataProtection/src/KeyManagement/Key.cs

+98-11
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Collections.Generic;
6+
using System.Diagnostics;
67
using System.Xml.Linq;
78
using Microsoft.AspNetCore.DataProtection.AuthenticatedEncryption;
89
using Microsoft.AspNetCore.DataProtection.AuthenticatedEncryption.ConfigurationModel;
@@ -16,7 +17,13 @@ namespace Microsoft.AspNetCore.DataProtection.KeyManagement;
1617
/// </summary>
1718
internal sealed class Key : IKey
1819
{
19-
private readonly Lazy<IAuthenticatedEncryptorDescriptor> _lazyDescriptor;
20+
private IAuthenticatedEncryptorDescriptor? _descriptor;
21+
22+
// If descriptor is available at construction time, these will remain null forever
23+
private readonly object? _descriptorLock; // Protects _descriptor and _descriptorException
24+
private readonly Func<IAuthenticatedEncryptorDescriptor>? _descriptorFactory; // May not be used
25+
private Exception? _descriptorException;
26+
2027
private readonly IEnumerable<IAuthenticatedEncryptorFactory> _encryptorFactories;
2128

2229
private IAuthenticatedEncryptor? _encryptor;
@@ -36,8 +43,10 @@ public Key(
3643
creationDate,
3744
activationDate,
3845
expirationDate,
39-
new Lazy<IAuthenticatedEncryptorDescriptor>(() => descriptor),
40-
encryptorFactories)
46+
encryptorFactories,
47+
descriptor,
48+
descriptorFactory: null,
49+
descriptorException: null)
4150
{
4251
}
4352

@@ -57,8 +66,29 @@ public Key(
5766
creationDate,
5867
activationDate,
5968
expirationDate,
60-
new Lazy<IAuthenticatedEncryptorDescriptor>(GetLazyDescriptorDelegate(keyManager, keyElement)),
61-
encryptorFactories)
69+
encryptorFactories,
70+
descriptor: null,
71+
descriptorFactory: GetLazyDescriptorDelegate(keyManager, keyElement),
72+
descriptorException: null)
73+
{
74+
}
75+
76+
// internal for testing
77+
internal Key(
78+
Guid keyId,
79+
DateTimeOffset creationDate,
80+
DateTimeOffset activationDate,
81+
DateTimeOffset expirationDate,
82+
IEnumerable<IAuthenticatedEncryptorFactory> encryptorFactories,
83+
Func<IAuthenticatedEncryptorDescriptor>? descriptorFactory)
84+
: this(keyId,
85+
creationDate,
86+
activationDate,
87+
expirationDate,
88+
encryptorFactories,
89+
descriptor: null,
90+
descriptorFactory: descriptorFactory,
91+
descriptorException: null)
6292
{
6393
}
6494

@@ -67,15 +97,20 @@ private Key(
6797
DateTimeOffset creationDate,
6898
DateTimeOffset activationDate,
6999
DateTimeOffset expirationDate,
70-
Lazy<IAuthenticatedEncryptorDescriptor> lazyDescriptor,
71-
IEnumerable<IAuthenticatedEncryptorFactory> encryptorFactories)
100+
IEnumerable<IAuthenticatedEncryptorFactory> encryptorFactories,
101+
IAuthenticatedEncryptorDescriptor? descriptor,
102+
Func<IAuthenticatedEncryptorDescriptor>? descriptorFactory,
103+
Exception? descriptorException)
72104
{
73105
KeyId = keyId;
74106
CreationDate = creationDate;
75107
ActivationDate = activationDate;
76108
ExpirationDate = expirationDate;
77-
_lazyDescriptor = lazyDescriptor;
78109
_encryptorFactories = encryptorFactories;
110+
_descriptor = descriptor;
111+
_descriptorFactory = descriptorFactory;
112+
_descriptorException = descriptorException;
113+
_descriptorLock = descriptor is null ? new() : null;
79114
}
80115

81116
public DateTimeOffset ActivationDate { get; }
@@ -92,7 +127,56 @@ public IAuthenticatedEncryptorDescriptor Descriptor
92127
{
93128
get
94129
{
95-
return _lazyDescriptor.Value;
130+
// We could check for _descriptorException here, but there's no reason to optimize that case
131+
// (i.e. by avoiding taking the lock)
132+
133+
if (_descriptor is not null) // Can only go from null to non-null, so losing a race here doesn't matter
134+
{
135+
Debug.Assert(_descriptorException is null); // Mutually exclusive with _descriptor
136+
return _descriptor;
137+
}
138+
139+
lock (_descriptorLock!)
140+
{
141+
if (_descriptorException is not null)
142+
{
143+
throw _descriptorException;
144+
}
145+
146+
if (_descriptor is not null)
147+
{
148+
return _descriptor;
149+
}
150+
151+
Debug.Assert(_descriptorFactory is not null, "Key constructed without either descriptor or descriptor factory");
152+
153+
try
154+
{
155+
_descriptor = _descriptorFactory();
156+
return _descriptor;
157+
}
158+
catch (Exception ex)
159+
{
160+
_descriptorException = ex;
161+
throw;
162+
}
163+
}
164+
}
165+
}
166+
167+
internal void ResetDescriptor()
168+
{
169+
if (_descriptor is not null)
170+
{
171+
Debug.Fail("ResetDescriptor called with descriptor available");
172+
Debug.Assert(_descriptorException is null); // Mutually exclusive with _descriptor
173+
return;
174+
}
175+
176+
lock (_descriptorLock!)
177+
{
178+
_descriptor = null;
179+
_descriptorException = null;
96180
}
97181
}
98182

@@ -121,13 +205,16 @@ internal void SetRevoked()
121205

122206
internal Key Clone()
123207
{
208+
// Note that we don't reuse _descriptorLock
124209
return new Key(
125210
keyId: KeyId,
126211
creationDate: CreationDate,
127212
activationDate: ActivationDate,
128213
expirationDate: ExpirationDate,
129-
lazyDescriptor: _lazyDescriptor,
130-
encryptorFactories: _encryptorFactories)
214+
encryptorFactories: _encryptorFactories,
215+
descriptor: _descriptor,
216+
descriptorFactory: _descriptorFactory,
217+
descriptorException: _descriptorException)
131218
{
132219
IsRevoked = IsRevoked,
133220
};

src/DataProtection/DataProtection/src/KeyManagement/KeyManagementOptions.cs

+19
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,25 @@ internal static TimeSpan MaxServerClockSkew
9494
}
9595
}
9696

97+
/// <summary>
98+
/// During each default key resolution, if a key decryption attempt fails,
99+
/// it can be retried, as long as the total number of retries across all keys
100+
/// does not exceed this value.
101+
/// </summary>
102+
/// <remarks>
103+
/// Settable for testing.
104+
/// </remarks>
105+
internal int MaximumTotalDefaultKeyResolverRetries { get; set; } = 10;
106+
107+
/// <summary>
108+
/// Wait this long before each default key resolution decryption retry.
109+
/// <seealso cref="MaximumTotalDefaultKeyResolverRetries"/>
110+
/// </summary>
111+
/// <remarks>
112+
/// Settable for testing.
113+
/// </remarks>
114+
internal TimeSpan DefaultKeyResolverRetryDelay { get; set; } = TimeSpan.FromMilliseconds(200);
115+
97116
/// <summary>
98117
/// Controls the lifetime (number of days before expiration)
99118
/// for newly-generated keys.

src/DataProtection/DataProtection/src/LoggingExtensions.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ private static bool IsLogLevelEnabledCore([NotNullWhen(true)] ILogger? logger, L
7373
[LoggerMessage(11, LogLevel.Debug, "Using managed symmetric algorithm '{FullName}'.", EventName = "UsingManagedSymmetricAlgorithm")]
7474
public static partial void UsingManagedSymmetricAlgorithm(this ILogger logger, string fullName);
7575

76-
[LoggerMessage(12, LogLevel.Warning, "Key {KeyId:B} is ineligible to be the default key because its {MethodName} method failed.", EventName = "KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed")]
76+
[LoggerMessage(12, LogLevel.Warning, "Key {KeyId:B} is ineligible to be the default key because its {MethodName} method failed after the maximum number of retries.", EventName = "KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed")]
7777
public static partial void KeyIsIneligibleToBeTheDefaultKeyBecauseItsMethodFailed(this ILogger logger, Guid keyId, string methodName, Exception exception);
7878

7979
[LoggerMessage(13, LogLevel.Debug, "Considering key {KeyId:B} with expiration date {ExpirationDate:u} as default key.", EventName = "ConsideringKeyWithExpirationDateAsDefaultKey")]
@@ -252,4 +252,7 @@ private static bool IsLogLevelEnabledCore([NotNullWhen(true)] ILogger? logger, L
252252

253253
[LoggerMessage(72, LogLevel.Error, "The key ring's default data protection key {KeyId:B} has been revoked.", EventName = "KeyRingDefaultKeyIsRevoked")]
254254
public static partial void KeyRingDefaultKeyIsRevoked(this ILogger logger, Guid keyId);
255+
256+
[LoggerMessage(73, LogLevel.Debug, "Key {KeyId:B} method {MethodName} failed. Retrying.", EventName = "RetryingMethodOfKeyAfterFailure")]
257+
public static partial void RetryingMethodOfKeyAfterFailure(this ILogger logger, Guid keyId, string methodName, Exception exception);
255258
}

0 commit comments

Comments
 (0)