From 52d767faf5d889ca05fe6e4db7470397e5d10901 Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Tue, 1 Apr 2025 14:51:58 -0700 Subject: [PATCH 1/6] Add support for the optimized query plan that skips the order by rewrite --- ...dSearchCrossPartitionQueryPipelineStage.cs | 28 +++++++++---------- .../HybridSearch/HybridSearchQueryResult.cs | 21 ++++++++++---- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchCrossPartitionQueryPipelineStage.cs b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchCrossPartitionQueryPipelineStage.cs index 8fd722197e..72c4bc3d68 100644 --- a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchCrossPartitionQueryPipelineStage.cs +++ b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchCrossPartitionQueryPipelineStage.cs @@ -404,12 +404,10 @@ private static IReadOnlyList ExtractComponentWeights(HybridSear for (int index = 0; index < hybridSearchQueryInfo.ComponentQueryInfos.Count; ++index) { QueryInfo queryInfo = hybridSearchQueryInfo.ComponentQueryInfos[index]; - Debug.Assert(queryInfo.HasOrderBy, "The component query should have an order by"); - Debug.Assert(queryInfo.HasNonStreamingOrderBy, "The component query is a non streaming order by"); - Debug.Assert(queryInfo.OrderBy.Count == 1, "The component query should have exactly one order by expression"); + SortOrder sortOrder = queryInfo.HasOrderBy ? queryInfo.OrderBy[0] : SortOrder.Descending; double componentWeight = useDefaultComponentWeight ? 1.0 : hybridSearchQueryInfo.ComponentWeights[index]; - result.Add(new ComponentWeight(componentWeight, queryInfo.OrderBy[0])); + result.Add(new ComponentWeight(componentWeight, sortOrder)); } return result; @@ -635,14 +633,19 @@ private static void ComputeRrfScores( private static QueryInfo RewriteOrderByQueryInfo(QueryInfo queryInfo, GlobalFullTextSearchStatistics statistics, int componentCount) { - Debug.Assert(queryInfo.HasOrderBy, "The component query should have an order by"); - Debug.Assert(queryInfo.HasNonStreamingOrderBy, "The component query is a non streaming order by"); + IReadOnlyList rewrittenOrderByExpressions = queryInfo.OrderByExpressions; - List rewrittenOrderByExpressions = new List(queryInfo.OrderByExpressions.Count); - foreach (string orderByExpression in queryInfo.OrderByExpressions) + if (queryInfo.HasOrderBy) { - string rewrittenOrderByExpression = FormatComponentQueryTextWorkaround(orderByExpression, statistics, componentCount); - rewrittenOrderByExpressions.Add(rewrittenOrderByExpression); + Debug.Assert(queryInfo.HasNonStreamingOrderBy, "The component query is a non streaming order by"); + List orderByExpressions = new List(queryInfo.OrderByExpressions.Count); + foreach (string orderByExpression in queryInfo.OrderByExpressions) + { + string rewrittenOrderByExpression = FormatComponentQueryTextWorkaround(orderByExpression, statistics, componentCount); + orderByExpressions.Add(rewrittenOrderByExpression); + } + + rewrittenOrderByExpressions = orderByExpressions; } string rewrittenQuery = FormatComponentQueryTextWorkaround(queryInfo.RewrittenQuery, statistics, componentCount); @@ -777,8 +780,6 @@ private static string FormatComponentQueryTextWorkaround(string format, GlobalFu private class ComponentWeight { - public SortOrder SortOrder { get; } - public double Weight { get; } public Comparison Comparison { get; } @@ -786,9 +787,8 @@ private class ComponentWeight public ComponentWeight(double weight, SortOrder sortOrder) { this.Weight = weight; - this.SortOrder = sortOrder; - int comparisonFactor = (this.SortOrder == SortOrder.Ascending) ? 1 : -1; + int comparisonFactor = (sortOrder == SortOrder.Ascending) ? 1 : -1; this.Comparison = (x, y) => comparisonFactor * x.CompareTo(y); } } diff --git a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs index c7b9f4df34..22ff641fa2 100644 --- a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs +++ b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs @@ -54,17 +54,26 @@ public static HybridSearchQueryResult Create(CosmosElement document) throw new ArgumentException($"{FieldNames.Payload} must exist."); } - if (!outerPayload.TryGetValue(FieldNames.Payload, out CosmosElement innerPayload)) + if (outerPayload.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) { - innerPayload = CosmosUndefined.Create(); + // Using the older format where the payload is nested. + if (!outerPayload.TryGetValue(FieldNames.Payload, out CosmosObject innerPayload)) + { + innerPayload = CosmosUndefined.Create(); + } + + return new HybridSearchQueryResult(rid, componentScores, innerPayload); } - if (!outerPayload.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) { - throw new ArgumentException($"{FieldNames.ComponentScores} must exist."); - } + // Using the newer format where the payload is not nested. + if (!cosmosObject.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) + { + throw new ArgumentException($"{FieldNames.ComponentScores} must exist."); + } - return new HybridSearchQueryResult(rid, componentScores, innerPayload); + return new HybridSearchQueryResult(rid, componentScores, outerPayload); + } } private static class FieldNames From fd9e8ccab68c9d3efdca84488df6b7829555735a Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Mon, 28 Apr 2025 14:39:28 -0700 Subject: [PATCH 2/6] Handle undefined values for payload in HybridSearchQueryResult --- .../HybridSearch/HybridSearchQueryResult.cs | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs index 22ff641fa2..07b4ee2b17 100644 --- a/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs +++ b/Microsoft.Azure.Cosmos/src/Query/Core/Pipeline/CrossPartition/HybridSearch/HybridSearchQueryResult.cs @@ -49,31 +49,33 @@ public static HybridSearchQueryResult Create(CosmosElement document) throw new ArgumentException($"{FieldNames.Rid} must exist."); } - if (!cosmosObject.TryGetValue(FieldNames.Payload, out CosmosObject outerPayload)) - { - throw new ArgumentException($"{FieldNames.Payload} must exist."); - } + bool outerPayloadExists = cosmosObject.TryGetValue(FieldNames.Payload, out CosmosObject outerPayload); - if (outerPayload.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) + HybridSearchQueryResult result; + if (outerPayloadExists && outerPayload.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) { // Using the older format where the payload is nested. - if (!outerPayload.TryGetValue(FieldNames.Payload, out CosmosObject innerPayload)) + if (!outerPayload.TryGetValue(FieldNames.Payload, out CosmosElement innerPayload)) { innerPayload = CosmosUndefined.Create(); } - return new HybridSearchQueryResult(rid, componentScores, innerPayload); + result = new HybridSearchQueryResult(rid, componentScores, innerPayload); } - + else { // Using the newer format where the payload is not nested. - if (!cosmosObject.TryGetValue(FieldNames.ComponentScores, out CosmosArray componentScores)) + if (!cosmosObject.TryGetValue(FieldNames.ComponentScores, out componentScores)) { throw new ArgumentException($"{FieldNames.ComponentScores} must exist."); } - return new HybridSearchQueryResult(rid, componentScores, outerPayload); + CosmosElement payload = outerPayloadExists ? outerPayload : CosmosUndefined.Create(); + + result = new HybridSearchQueryResult(rid, componentScores, payload); } + + return result; } private static class FieldNames From d1632876a1ed19edafa9c97b870977409f97283a Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Fri, 2 May 2025 01:32:45 -0700 Subject: [PATCH 3/6] draft changes for unit tests --- .../Pipeline/NonStreamingOrderByQueryTests.cs | 343 ++++++++++++++++-- 1 file changed, 320 insertions(+), 23 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs index 8352267ee5..4c01d66721 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs @@ -31,7 +31,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline [TestClass] public class NonStreamingOrderByQueryTests { - private const int MaxConcurrency = 10; + private const int MaxConcurrency = 0; private const int DocumentCount = 420; @@ -321,6 +321,69 @@ public async Task HybridSearchTests() } } + [TestMethod] + public async Task HybridSearchSkipOrderByRewriteTests() + { + IReadOnlyList testCases = new List + { + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: false, + skip: null, + take: 100, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: false, + skip: 20, + take: 100, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 20, + take: 100, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 20, + take: 100, + pageSize: 10), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 10, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 20, + take: 100, + pageSize: 10), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 100, + requiresGlobalStatistics: true, + skip: 7, + take: 10, + pageSize: 1), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 0, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 0, + take: 10, + pageSize: 10, + returnEmptyGlobalStatistics: true), + }; + + foreach (HybridSearchTest testCase in testCases) + { + await RunHybridSearchTest(testCase); + } + } + [TestMethod] public async Task HybridSearchWeightedRRFTests() { @@ -384,6 +447,70 @@ public async Task HybridSearchWeightedRRFTests() } } + [TestMethod] + public async Task HybridSearchSkipOrderByRewriteWeightedRRFTests() + { + IReadOnlyList testCases = new List + { + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: false, + skip: null, + take: 100, + weights: new double[] { 1.0, 1.0 }, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: false, + skip: 20, + take: 100, + weights: new double[] { 0.25, 2.1 }, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 0, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 0, + take: 10, + weights: new double[] { 1.25, 2.0 }, + pageSize: 10, + returnEmptyGlobalStatistics: true), + + //MakeHybridSearchSkipOrderByRewriteTest( + // leafPageCount: 4, + // backendPageSize: 10, + // requiresGlobalStatistics: false, + // skip: null, + // take: 100, + // weights: new double[] { -1.0, -1.0 }, + // pageSize: 1000), + //MakeHybridSearchSkipOrderByRewriteTest( + // leafPageCount: 4, + // backendPageSize: 100, + // requiresGlobalStatistics: true, + // skip: 7, + // take: 10, + // weights: new double[] { -1.33, -0.45 }, + // pageSize: 1), + //MakeHybridSearchSkipOrderByRewriteTest( + // leafPageCount: 0, + // backendPageSize: 10, + // requiresGlobalStatistics: true, + // skip: 0, + // take: 10, + // weights: new double[] { -1.25, -2.0 }, + // pageSize: 10, + // returnEmptyGlobalStatistics: true), + }; + + foreach (HybridSearchTest testCase in testCases) + { + await RunHybridSearchTest(testCase); + } + } + private static async Task RunHybridSearchTest(HybridSearchTest testCase) { IReadOnlyList ranges = new List @@ -440,7 +567,8 @@ private static async Task RunHybridSearchTest(HybridSearchTest testCase) feedModes, leafPageCount: testCase.LeafPageCount, backendPageSize: testCase.BackendPageSize, - returnEmptyGlobalStatistics: testCase.ReturnEmptyGlobalStatistics); + returnEmptyGlobalStatistics: testCase.ReturnEmptyGlobalStatistics, + skipOrderByRewrite: testCase.SkipOrderByRewrite); (IReadOnlyList results, double requestCharge) = await CreateAndRunHybridSearchQueryPipelineStage( documentContainer: nonStreamingDocumentContainer, @@ -449,7 +577,8 @@ private static async Task RunHybridSearchTest(HybridSearchTest testCase) pageSize: testCase.PageSize, skip: (uint?)testCase.Skip, take: (uint?)testCase.Take, - weights: testCase.Weights); + weights: testCase.Weights, + skipOrderByRewrite: testCase.SkipOrderByRewrite); Assert.AreEqual(expectedIndices.Count(), results.Count); @@ -534,15 +663,20 @@ private static async Task RunParityTests( int pageSize, uint? skip, uint? take, - double[] weights) + double[] weights, + bool skipOrderByRewrite) { + HybridSearchQueryInfo hybridSearchQueryInfo = skipOrderByRewrite ? + Create2ItemHybridSearchSkipOrderByRewriteQueryInfo(requiresGlobalStatistics, skip, take, weights) : + Create2ItemHybridSearchQueryInfo(requiresGlobalStatistics, skip, take, weights); + TryCatch tryCreatePipeline = PipelineFactory.MonadicCreate( documentContainer, Create2ItemSqlQuerySpec(), ranges, partitionKey: null, queryInfo: null, - Create2ItemHybridSearchQueryInfo(requiresGlobalStatistics, skip, take, weights), + hybridSearchQueryInfo: hybridSearchQueryInfo, maxItemCount: pageSize, new ContainerQueryProperties(), ranges, @@ -714,6 +848,28 @@ public TestCase( } private static HybridSearchTest MakeHybridSearchTest( + int leafPageCount, + int backendPageSize, + bool requiresGlobalStatistics, + int? skip, + int? take, + int pageSize, + bool returnEmptyGlobalStatistics = false, + bool skipOrderByRewrite = false) + { + return new HybridSearchTest( + leafPageCount, + backendPageSize, + requiresGlobalStatistics, + skip, + take, + weights: null, + pageSize, + returnEmptyGlobalStatistics, + skipOrderByRewrite); + } + + private static HybridSearchTest MakeHybridSearchSkipOrderByRewriteTest( int leafPageCount, int backendPageSize, bool requiresGlobalStatistics, @@ -722,10 +878,42 @@ private static HybridSearchTest MakeHybridSearchTest( int pageSize, bool returnEmptyGlobalStatistics = false) { - return new HybridSearchTest(leafPageCount, backendPageSize, requiresGlobalStatistics, skip, take, weights: null, pageSize, returnEmptyGlobalStatistics); + return new HybridSearchTest( + leafPageCount, + backendPageSize, + requiresGlobalStatistics, + skip, + take, + weights: null, + pageSize, + returnEmptyGlobalStatistics, + skipOrderByRewrite: true); } private static HybridSearchTest MakeHybridSearchTest( + int leafPageCount, + int backendPageSize, + bool requiresGlobalStatistics, + int? skip, + int? take, + double[] weights, + int pageSize, + bool returnEmptyGlobalStatistics = false, + bool skipOrderByRewrite = false) + { + return new HybridSearchTest( + leafPageCount, + backendPageSize, + requiresGlobalStatistics, + skip, + take, + weights, + pageSize, + returnEmptyGlobalStatistics, + skipOrderByRewrite); + } + + private static HybridSearchTest MakeHybridSearchSkipOrderByRewriteTest( int leafPageCount, int backendPageSize, bool requiresGlobalStatistics, @@ -735,7 +923,16 @@ private static HybridSearchTest MakeHybridSearchTest( int pageSize, bool returnEmptyGlobalStatistics = false) { - return new HybridSearchTest(leafPageCount, backendPageSize, requiresGlobalStatistics, skip, take, weights, pageSize, returnEmptyGlobalStatistics); + return new HybridSearchTest( + leafPageCount, + backendPageSize, + requiresGlobalStatistics, + skip, + take, + weights, + pageSize, + returnEmptyGlobalStatistics, + skipOrderByRewrite: true); } private class HybridSearchTest @@ -756,6 +953,8 @@ private class HybridSearchTest public bool ReturnEmptyGlobalStatistics { get; } + public bool SkipOrderByRewrite { get; } + public HybridSearchTest( int leafPageCount, int backendPageSize, @@ -764,7 +963,8 @@ public HybridSearchTest( int? take, double[] weights, int pageSize, - bool returnEmptyGlobalStatistics) + bool returnEmptyGlobalStatistics, + bool skipOrderByRewrite) { this.LeafPageCount = leafPageCount; this.BackendPageSize = backendPageSize; @@ -774,6 +974,7 @@ public HybridSearchTest( this.Weights = weights; this.PageSize = pageSize; this.ReturnEmptyGlobalStatistics = returnEmptyGlobalStatistics; + this.SkipOrderByRewrite = skipOrderByRewrite; } } @@ -949,7 +1150,7 @@ public async Task> MonadicQueryAsync( } QueryPage page = queryPage.Result; - DebugTraceHelpers.TraceBackendResponse(page); + DebugTraceHelpers.TraceBackendResponse(feedRangeState.FeedRange, page); return TryCatch.FromResult(new QueryPage( page.Documents, @@ -1056,7 +1257,7 @@ private async Task SplitMergeAsync() private static class DebugTraceHelpers { #pragma warning disable CS0162, CS0649 // Unreachable code detected - private static readonly bool Enabled; + private static readonly bool Enabled = true; [Conditional("DEBUG")] public static void TraceSplit(FeedRangeInternal feedRange) @@ -1096,11 +1297,11 @@ public static void TracePipelineStagePage(QueryPage page) } [Conditional("DEBUG")] - public static void TraceBackendResponse(QueryPage page) + public static void TraceBackendResponse(FeedRangeInternal feedRange, QueryPage page) { if (Enabled) { - System.Diagnostics.Trace.WriteLine("Serving query from backend: "); + System.Diagnostics.Trace.WriteLine($"Serving query from backend: {feedRange}"); TracePage(page); } } @@ -1111,6 +1312,7 @@ public static void TracePage(QueryPage page) if (Enabled) { System.Diagnostics.Trace.WriteLine("Page:"); + System.Diagnostics.Trace.WriteLine($" State: {page.State?.Value}"); System.Diagnostics.Trace.WriteLine($" ActivityId: {page.ActivityId}"); System.Diagnostics.Trace.WriteLine($" RequestCharge: {page.RequestCharge}"); System.Diagnostics.Trace.WriteLine($" ActivityId: {page.ActivityId}"); @@ -1172,7 +1374,8 @@ public static MockDocumentContainer CreateHybridSearchContainer( PartitionedFeedMode[] feedModes, int leafPageCount, int backendPageSize, - bool returnEmptyGlobalStatistics) + bool returnEmptyGlobalStatistics, + bool skipOrderByRewrite) { Assert.IsTrue(feedModes.All(x => x.HasFlag(PartitionedFeedMode.NonStreaming)) || feedModes.All(x => !x.HasFlag(PartitionedFeedMode.NonStreaming))); @@ -1180,7 +1383,8 @@ public static MockDocumentContainer CreateHybridSearchContainer( feedRanges, feedModes, leafPageCount, - backendPageSize); + backendPageSize, + skipOrderByRewrite); return new MockDocumentContainer( pages, @@ -1322,7 +1526,7 @@ public Task> MonadicQueryAsync(SqlQuerySpec sqlQuerySpec, Fe state: state, streaming: this.streaming); - DebugTraceHelpers.TraceBackendResponse(queryPage); + DebugTraceHelpers.TraceBackendResponse(feedRangeState.FeedRange, queryPage); Interlocked.Increment(ref this.queryCount); return Task.FromResult(TryCatch.FromResult(queryPage)); @@ -1437,7 +1641,8 @@ private static IReadOnlyList feedRanges, PartitionedFeedMode[] feedModes, int leafPageCount, - int pageSize) + int pageSize, + bool skipOrderByRewrite) { int componentCount = feedModes.Length; List>>> componentPages = new List>>>(componentCount); @@ -1449,7 +1654,7 @@ private static IReadOnlyList CreateHybridSearchDocument(componentCount, index, componentIndex)); + (componentIndex, index) => CreateHybridSearchDocument(componentCount: componentCount, index:index, componentIndex: componentIndex, skipOrderByRewrite)); componentPages.Add(pages); } @@ -1579,7 +1784,7 @@ private static CosmosElement CreateHybridSearchGlobalStatistics() return globalStatistics; } - private static CosmosElement CreateHybridSearchDocument(int componentCount, int index, int componentIndex) + private static CosmosElement CreateHybridSearchDocument(int componentCount, int index, int componentIndex, bool skipOrderByRewrite) { CosmosElement indexElement = CosmosNumber64.Create(index); CosmosElement indexStringElement = CosmosString.Create(index.ToString("D4")); @@ -1613,13 +1818,21 @@ private static CosmosElement CreateHybridSearchDocument(int componentCount, int (ulong)index, Documents.ResourceType.Document); - CosmosElement document = CosmosObject.Create(new Dictionary + if (skipOrderByRewrite) { - [RId] = CosmosString.Create(resourceId.ToString()), - [OrderByItems] = CosmosArray.Create(orderByItems), - [Payload] = CosmosObject.Create(payload) - }); + payload.Add(RId, CosmosString.Create(resourceId.ToString())); + } + else + { + payload = new Dictionary + { + [RId] = CosmosString.Create(resourceId.ToString()), + [OrderByItems] = CosmosArray.Create(orderByItems), + [Payload] = CosmosObject.Create(payload) + }; + } + CosmosElement document = CosmosObject.Create(payload); return document; } @@ -1802,6 +2015,90 @@ ORDER BY _FullTextScore(c.abstract, [""energy""], {documentdb-formattablehybrids }; } + private static HybridSearchQueryInfo Create2ItemHybridSearchSkipOrderByRewriteQueryInfo(bool requiresGlobalStatistics, uint? skip, uint? take, double[] weights) + { + if (weights != null) + { + Assert.AreEqual(2, weights.Length); + + for (int i = 0; i < weights.Length; ++i) + { + weights[i] = Math.Abs(weights[i]); + } + } + + return new HybridSearchQueryInfo + { + GlobalStatisticsQuery = @" + SELECT + COUNT(1) AS documentCount, + [ + { + totalWordCount: SUM(_FullTextWordCount(c.text)), + hitCounts: [ + COUNTIF(FullTextContains(c.text, ""swim"")), + COUNTIF(FullTextContains(c.text, ""run"")) + ] + }, + { + totalWordCount: SUM(_FullTextWordCount(c.abstract)), + hitCounts: [ + COUNTIF(FullTextContains(c.abstract, ""energy"")) + ] + } + ] AS fullTextStatistics + FROM c", + + ComponentQueryInfos = new List + { + new QueryInfo + { + DistinctType = DistinctQueryType.None, + HasSelectValue = false, + RewrittenQuery = @" + SELECT TOP 200 + c._rid, + { + text: c.text, + abstract: c.abstract + } AS payload, + [ + (_FullTextScore(c.text, [""swim"", ""run""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-0}, {documentdb-formattablehybridsearchquery-hitcountsarray-0}) ?? -1), + (_FullTextScore(c.abstract, [""energy""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-1}, {documentdb-formattablehybridsearchquery-hitcountsarray-1}) ?? -1) + ] AS componentScores + FROM c + ORDER BY _FullTextScore(c.text, [""swim"", ""run""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-0}, {documentdb-formattablehybridsearchquery-hitcountsarray-0}) DESC", + HasNonStreamingOrderBy = false, + }, + + new QueryInfo + { + DistinctType = DistinctQueryType.None, + HasSelectValue = false, + RewrittenQuery = @" + SELECT TOP 200 + c._rid, + { + text: c.text, + abstract: c.abstract + } AS payload, + [ + (_FullTextScore(c.text, [""swim"", ""run""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-0}, {documentdb-formattablehybridsearchquery-hitcountsarray-0}) ?? -1), + (_FullTextScore(c.abstract, [""energy""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-1}, {documentdb-formattablehybridsearchquery-hitcountsarray-1}) ?? -1) + ] AS componentScores + FROM c + ORDER BY _FullTextScore(c.abstract, [""energy""], {documentdb-formattablehybridsearchquery-totaldocumentcount}, {documentdb-formattablehybridsearchquery-totalwordcount-1}, {documentdb-formattablehybridsearchquery-hitcountsarray-1}) DESC", + HasNonStreamingOrderBy = false, + }, + }, + + Skip = skip, + Take = take, + ComponentWeights = weights?.ToList(), + RequiresGlobalStatistics = requiresGlobalStatistics, + }; + } + private static SqlQuerySpec Create2ItemSqlQuerySpec(double[] weights = null) { if (weights != null) From 20e563fd96e6249b630093fe29befcf2d7a1374f Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Mon, 5 May 2025 12:38:56 -0700 Subject: [PATCH 4/6] Add more unit tests for weighted RRF with skip order by rewrite --- .../Pipeline/NonStreamingOrderByQueryTests.cs | 75 ++++++++++--------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs index 4c01d66721..5b8a3db960 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs @@ -477,32 +477,31 @@ public async Task HybridSearchSkipOrderByRewriteWeightedRRFTests() weights: new double[] { 1.25, 2.0 }, pageSize: 10, returnEmptyGlobalStatistics: true), - - //MakeHybridSearchSkipOrderByRewriteTest( - // leafPageCount: 4, - // backendPageSize: 10, - // requiresGlobalStatistics: false, - // skip: null, - // take: 100, - // weights: new double[] { -1.0, -1.0 }, - // pageSize: 1000), - //MakeHybridSearchSkipOrderByRewriteTest( - // leafPageCount: 4, - // backendPageSize: 100, - // requiresGlobalStatistics: true, - // skip: 7, - // take: 10, - // weights: new double[] { -1.33, -0.45 }, - // pageSize: 1), - //MakeHybridSearchSkipOrderByRewriteTest( - // leafPageCount: 0, - // backendPageSize: 10, - // requiresGlobalStatistics: true, - // skip: 0, - // take: 10, - // weights: new double[] { -1.25, -2.0 }, - // pageSize: 10, - // returnEmptyGlobalStatistics: true), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 10, + requiresGlobalStatistics: false, + skip: null, + take: 100, + weights: new double[] { -1.0, -1.0 }, + pageSize: 1000), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 4, + backendPageSize: 100, + requiresGlobalStatistics: true, + skip: 7, + take: 10, + weights: new double[] { -1.33, -0.45 }, + pageSize: 1), + MakeHybridSearchSkipOrderByRewriteTest( + leafPageCount: 0, + backendPageSize: 10, + requiresGlobalStatistics: true, + skip: 0, + take: 10, + weights: new double[] { -1.25, -2.0 }, + pageSize: 10, + returnEmptyGlobalStatistics: true), }; foreach (HybridSearchTest testCase in testCases) @@ -544,11 +543,11 @@ private static async Task RunHybridSearchTest(HybridSearchTest testCase) { expectedIndices = expectedIndices.Reverse(); - feedModes = new PartitionedFeedMode[] - { - PartitionedFeedMode.NonStreaming, - PartitionedFeedMode.NonStreaming, - }; + PartitionedFeedMode feedMode = testCase.SkipOrderByRewrite? + PartitionedFeedMode.NonStreamingReversed | PartitionedFeedMode.NegateScores: + PartitionedFeedMode.NonStreaming; + + feedModes = new PartitionedFeedMode[] { feedMode, feedMode }; } } @@ -1586,6 +1585,8 @@ enum PartitionedFeedMode NonStreaming = 1, Reversed = 2, + NegateScores = 4, + StreamingReversed = Streaming | Reversed, NonStreamingReversed = NonStreaming | Reversed, } @@ -1654,7 +1655,12 @@ private static IReadOnlyList CreateHybridSearchDocument(componentCount: componentCount, index:index, componentIndex: componentIndex, skipOrderByRewrite)); + (componentIndex, index) => CreateHybridSearchDocument( + componentCount: componentCount, + index: index, + componentIndex: componentIndex, + skipOrderByRewrite: skipOrderByRewrite, + negateScores: feedModes[componentIndex].HasFlag(PartitionedFeedMode.NegateScores))); componentPages.Add(pages); } @@ -1784,15 +1790,16 @@ private static CosmosElement CreateHybridSearchGlobalStatistics() return globalStatistics; } - private static CosmosElement CreateHybridSearchDocument(int componentCount, int index, int componentIndex, bool skipOrderByRewrite) + private static CosmosElement CreateHybridSearchDocument(int componentCount, int index, int componentIndex, bool skipOrderByRewrite, bool negateScores) { CosmosElement indexElement = CosmosNumber64.Create(index); CosmosElement indexStringElement = CosmosString.Create(index.ToString("D4")); double[] scores = new double[componentCount]; double delta = 0.1; + double factor = negateScores ? -1.0 : 1.0; for (int scoreIndex = 0; scoreIndex < componentCount; ++scoreIndex) { - scores[scoreIndex] = index + ((1 + scoreIndex) * delta); + scores[scoreIndex] = factor * (index + ((1 + scoreIndex) * delta)); } List orderByItems = new List From 9d905768af2d9570ce98859ef2caf9130ce6279b Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Mon, 5 May 2025 12:52:01 -0700 Subject: [PATCH 5/6] Bump up max concurrency for NonStreamingOrderByQueryTests --- .../Query/Pipeline/NonStreamingOrderByQueryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs index 5b8a3db960..4f985359ce 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs @@ -31,7 +31,7 @@ namespace Microsoft.Azure.Cosmos.Tests.Query.Pipeline [TestClass] public class NonStreamingOrderByQueryTests { - private const int MaxConcurrency = 0; + private const int MaxConcurrency = 10; private const int DocumentCount = 420; From 9924c6c385ea2d890bbdb51f718c66461ae062fb Mon Sep 17 00:00:00 2001 From: "Neil Deshpande (from Dev Box)" Date: Mon, 5 May 2025 14:58:30 -0700 Subject: [PATCH 6/6] Disable the debug trace for NonStreamingOrderByQueryTests --- .../Query/Pipeline/NonStreamingOrderByQueryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs index 4f985359ce..f9645ef2a5 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Query/Pipeline/NonStreamingOrderByQueryTests.cs @@ -1256,7 +1256,7 @@ private async Task SplitMergeAsync() private static class DebugTraceHelpers { #pragma warning disable CS0162, CS0649 // Unreachable code detected - private static readonly bool Enabled = true; + private static readonly bool Enabled; [Conditional("DEBUG")] public static void TraceSplit(FeedRangeInternal feedRange)