diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs index 25b46ad0a6..26c1c92665 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/CosmosHttpClientCore.cs @@ -388,7 +388,7 @@ private async Task SendHttpHelperAsync( return responseMessage; } - bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator); + bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutEnumerator); if (isOutOfRetries) { return responseMessage; @@ -402,7 +402,7 @@ private async Task SendHttpHelperAsync( datum.RecordHttpException(requestMessage, e, resourceType, requestStartTime); trace = datum.Trace; } - bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutPolicy, startDateTimeUtc, timeoutEnumerator); + bool isOutOfRetries = CosmosHttpClientCore.IsOutOfRetries(timeoutEnumerator); switch (e) { @@ -415,7 +415,7 @@ private async Task SendHttpHelperAsync( // Convert OperationCanceledException to 408 when the HTTP client throws it. This makes it clear that the // the request timed out and was not user canceled operation. - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(requestMessage.Method)) + if (isOutOfRetries || !CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest)) { // throw current exception (caught in transport handler) string message = @@ -440,14 +440,14 @@ private async Task SendHttpHelperAsync( break; case WebException webException: - if (isOutOfRetries || (!timeoutPolicy.IsSafeToRetry(requestMessage.Method) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) + if (isOutOfRetries || (!CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest) && !WebExceptionUtility.IsWebExceptionRetriable(webException))) { throw; } break; case HttpRequestException httpRequestException: - if (isOutOfRetries || !timeoutPolicy.IsSafeToRetry(requestMessage.Method)) + if (isOutOfRetries || !CosmosHttpClientCore.IsSafeToRetry(documentServiceRequest)) { throw; } @@ -493,13 +493,24 @@ private async Task SendHttpHelperAsync( } private static bool IsOutOfRetries( - HttpTimeoutPolicy timeoutPolicy, - DateTime startDateTimeUtc, IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> timeoutEnumerator) { return !timeoutEnumerator.MoveNext(); // No more retries are configured } + private static bool IsSafeToRetry(DocumentServiceRequest documentServiceRequest) + { + // Three scenarios are safely retriable: + // 1) If request is null since they are originated from GetAsync calls + // 2) If request is read-only + // 3) If request is an address request. + if (documentServiceRequest == null) + { + return true; + } + return documentServiceRequest.IsReadOnlyRequest || documentServiceRequest.ResourceType == ResourceType.Address; + } + private async Task ExecuteHttpHelperAsync( HttpRequestMessage requestMessage, ResourceType resourceType, diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs index 1620e1b36c..1891a908d4 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs @@ -13,7 +13,6 @@ internal abstract class HttpTimeoutPolicy public abstract string TimeoutPolicyName { get; } public abstract int TotalRetryCount { get; } public abstract IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> GetTimeoutEnumerator(); - public abstract bool IsSafeToRetry(HttpMethod httpMethod); public abstract bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage); diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs index 9f85ba33ab..02a29b99ad 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRead.cs @@ -32,12 +32,6 @@ private HttpTimeoutPolicyControlPlaneRead() return this.TimeoutsAndDelays.GetEnumerator(); } - // This is for control plane reads which should always be safe to retry on. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return true; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs index 53a7c78e9e..8143b49015 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyControlPlaneRetriableHotPath.cs @@ -36,13 +36,6 @@ private HttpTimeoutPolicyControlPlaneRetriableHotPath(bool shouldThrow503OnTimeo return this.TimeoutsAndDelays.GetEnumerator(); } - // The hot path should always be safe to retires since it should be retrieving meta data - // information that is not idempotent. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return true; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { if (responseMessage == null) @@ -55,11 +48,6 @@ public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, Ht return false; } - if (!this.IsSafeToRetry(requestHttpMethod)) - { - return false; - } - return true; } diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs index 69a83707a1..15d4aaa6c3 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyDefault.cs @@ -36,13 +36,6 @@ private HttpTimeoutPolicyDefault(bool shouldThrow503OnTimeout) return this.TimeoutsAndDelays.GetEnumerator(); } - // Assume that it is not safe to retry unless it is a get method. - // Create and other operations could have succeeded even though a timeout occurred. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return httpMethod == HttpMethod.Get; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs index 3e623a5aed..5f847ac9ec 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForPartitionFailover.cs @@ -35,13 +35,6 @@ private HttpTimeoutPolicyForPartitionFailover(bool shouldThrow503OnTimeout) return this.TimeoutsAndDelays.GetEnumerator(); } - // Assume that it is not safe to retry unless it is a get method. - // Create and other operations could have succeeded even though a timeout occurred. - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return httpMethod == HttpMethod.Get; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs index 91201a8538..4f1d9f598f 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyForThinClient.cs @@ -39,11 +39,6 @@ private HttpTimeoutPolicyForThinClient( return this.TimeoutsAndDelays.GetEnumerator(); } - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return this.shouldRetry; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { if (responseMessage == null) @@ -56,7 +51,7 @@ public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, Ht return false; } - if (!this.IsSafeToRetry(requestHttpMethod)) + if (!this.shouldRetry) { return false; } diff --git a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs index 65ea12144b..cb4c48011b 100644 --- a/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs +++ b/Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicyNoRetry.cs @@ -31,12 +31,6 @@ private HttpTimeoutPolicyNoRetry() return this.TimeoutsAndDelays.GetEnumerator(); } - // Always Unsafe to retry - public override bool IsSafeToRetry(HttpMethod httpMethod) - { - return false; - } - public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage) { return false; diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs index 9411ade3eb..1507b93e0b 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosHttpClientCoreTests.cs @@ -123,7 +123,8 @@ async Task sendFunc(HttpRequestMessage request, Cancellatio resourceType: ResourceType.Collection, timeoutPolicy: currentTimeoutPolicy.Key, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(ResourceType.Collection, OperationType.Read)); Assert.AreEqual(HttpStatusCode.OK, responseMessage.StatusCode); } @@ -266,7 +267,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: ResourceType.Collection, timeoutPolicy: HttpTimeoutPolicyDefault.Instance, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(ResourceType.Collection, OperationType.Read)); } } catch (Exception) @@ -281,7 +283,7 @@ Task sendFunc(HttpRequestMessage request, CancellationToken public async Task HttpTimeoutThrow503TestAsync() { - async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) + async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) { int count = 0; Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) @@ -306,7 +308,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: resourceType, timeoutPolicy: timeoutPolicy, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); } } catch (Exception e) @@ -328,19 +331,19 @@ Task sendFunc(HttpRequestMessage request, CancellationToken } //Data plane read - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Data plane write (Should throw a 408 OperationCanceledException rather than a 503) - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); //Meta data read - await TestScenarioAsync(HttpMethod.Get, ResourceType.Database, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Database, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Query plan read (note all query plan operations are reads). - await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); + await TestScenarioAsync(HttpMethod.Get, ResourceType.Document, OperationType.Read, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, typeof(CosmosException), 3); //Metadata Write (Should throw a 408 OperationCanceledException rather than a 503) - await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); + await TestScenarioAsync(HttpMethod.Post, ResourceType.Document, OperationType.Upsert, HttpTimeoutPolicyDefault.Instance, typeof(TaskCanceledException), 1); } [TestMethod] @@ -433,7 +436,8 @@ async Task sendFunc(HttpRequestMessage request, Cancellatio resourceType: ResourceType.Document, timeoutPolicy: HttpTimeoutPolicyControlPlaneRetriableHotPath.Instance, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: documentServiceRequest); Assert.AreEqual(HttpStatusCode.OK, responseMessage.StatusCode); } @@ -492,7 +496,7 @@ public void CreateHttpClientHandlerCreatesCorrectValueType() public async Task HttpTimeoutPolicyForThinClientOn503TestAsync() { - async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) + async Task TestScenarioAsync(HttpMethod method, ResourceType resourceType, OperationType operationType, HttpTimeoutPolicy timeoutPolicy, Type expectedException, int expectedNumberOfRetrys) { int count = 0; Task sendFunc(HttpRequestMessage request, CancellationToken cancellationToken) @@ -517,7 +521,8 @@ Task sendFunc(HttpRequestMessage request, CancellationToken resourceType: resourceType, timeoutPolicy: timeoutPolicy, clientSideRequestStatistics: new ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace), - cancellationToken: default); + cancellationToken: default, + documentServiceRequest: CreateDocumentServiceRequestByOperation(resourceType, operationType)); } } catch (Exception e) @@ -542,6 +547,7 @@ Task sendFunc(HttpRequestMessage request, CancellationToken await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Document, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Read), isPartitionLevelFailoverEnabled: false, @@ -553,6 +559,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Document, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Query), isPartitionLevelFailoverEnabled: false, @@ -564,6 +571,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Post, resourceType: ResourceType.Document, + operationType: OperationType.Upsert, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Create), isPartitionLevelFailoverEnabled: false, @@ -575,6 +583,7 @@ await TestScenarioAsync( await TestScenarioAsync( method: HttpMethod.Get, resourceType: ResourceType.Database, + operationType: OperationType.Read, timeoutPolicy: HttpTimeoutPolicy.GetTimeoutPolicy( documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Database, OperationType.Read), isPartitionLevelFailoverEnabled: false, @@ -583,6 +592,7 @@ await TestScenarioAsync( expectedNumberOfRetrys: 3); } + private static DocumentServiceRequest CreateDocumentServiceRequestByOperation( ResourceType resourceType, OperationType operationType) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs index a39603c20c..1a52543ee2 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/GatewayStoreModelTest.cs @@ -889,15 +889,22 @@ public async Task GatewayStatsDurationTest() DocumentClientEventSource.Instance); using (ITrace trace = Tracing.Trace.GetRootTrace(nameof(GatewayStatsDurationTest))) - { - + { + Tracing.TraceData.ClientSideRequestStatisticsTraceDatum clientSideRequestStatistics = new Tracing.TraceData.ClientSideRequestStatisticsTraceDatum(DateTime.UtcNow, trace); await cosmosHttpClient.SendHttpAsync(() => new ValueTask(new HttpRequestMessage(HttpMethod.Get, "http://someuri.com")), ResourceType.Document, HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout, clientSideRequestStatistics, - CancellationToken.None); + CancellationToken.None, + documentServiceRequest: new DocumentServiceRequest( + OperationType.Read, + ResourceType.Document, + $"dbs/dummy_db_id/colls/dummy_ct_id", + body: null, + AuthorizationTokenType.PrimaryMasterKey, + headers: null)); Assert.AreEqual(clientSideRequestStatistics.HttpResponseStatisticsList.Count, 2); // The duration is calculated using date times which can cause the duration to be slightly off. This allows for up to 15 Ms of variance.