diff --git a/Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs b/Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs index a08fa3ab4e..6cb6c2a4bb 100644 --- a/Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs +++ b/Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs @@ -5,6 +5,8 @@ namespace Microsoft.Azure.Cosmos.Fluent { using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; using System.Threading; using System.Threading.Tasks; @@ -20,6 +22,7 @@ public class ContainerBuilder : ContainerDefinition private ConflictResolutionPolicy conflictResolutionPolicy; private ChangeFeedPolicy changeFeedPolicy; private ClientEncryptionPolicy clientEncryptionPolicy; + private VectorEmbeddingPolicy vectorEmbeddingPolicy; /// /// Creates an instance for unit-testing @@ -114,6 +117,20 @@ public ClientEncryptionPolicyDefinition WithClientEncryptionPolicy(int policyFor policyFormatVersion); } + /// + /// Defined the vector embedding policy for this Azure Cosmos container + /// + /// List of vector embeddings to include in the policy definition. + /// An instance of . + internal VectorEmbeddingPolicyDefinition WithVectorEmbeddingPolicy( + Collection embeddings) + { + return new VectorEmbeddingPolicyDefinition( + this, + embeddings, + (embeddingPolicy) => this.AddVectorEmbeddingPolicy(embeddingPolicy)); + } + /// /// Creates a container with the current fluent definition. /// @@ -220,6 +237,11 @@ public async Task CreateIfNotExistsAsync( containerProperties.ClientEncryptionPolicy = this.clientEncryptionPolicy; } + if (this.vectorEmbeddingPolicy != null) + { + containerProperties.VectorEmbeddingPolicy = this.vectorEmbeddingPolicy; + } + return containerProperties; } @@ -254,5 +276,10 @@ private void AddClientEncryptionPolicy(ClientEncryptionPolicy clientEncryptionPo { this.clientEncryptionPolicy = clientEncryptionPolicy; } + + private void AddVectorEmbeddingPolicy(VectorEmbeddingPolicy embeddingPolicy) + { + this.vectorEmbeddingPolicy = embeddingPolicy; + } } } diff --git a/Microsoft.Azure.Cosmos/src/Fluent/Settings/IndexingPolicyDefinition.cs b/Microsoft.Azure.Cosmos/src/Fluent/Settings/IndexingPolicyDefinition.cs index d964d22621..9de73206dd 100644 --- a/Microsoft.Azure.Cosmos/src/Fluent/Settings/IndexingPolicyDefinition.cs +++ b/Microsoft.Azure.Cosmos/src/Fluent/Settings/IndexingPolicyDefinition.cs @@ -113,6 +113,17 @@ public SpatialIndexDefinition> WithSpatialIndex() (spatialIndex) => this.AddSpatialPath(spatialIndex)); } + /// + /// Defines a in the current 's definition. + /// + /// An instance of . + internal VectorIndexDefinition> WithVectorIndex() + { + return new VectorIndexDefinition>( + this, + (vectorIndex) => this.AddVectorIndexPath(vectorIndex)); + } + /// /// Applies the current definition to the parent. /// @@ -133,6 +144,11 @@ private void AddSpatialPath(SpatialPath spatialSpec) this.indexingPolicy.SpatialIndexes.Add(spatialSpec); } + private void AddVectorIndexPath(VectorIndexPath vectorIndexPath) + { + this.indexingPolicy.VectorIndexes.Add(vectorIndexPath); + } + private void AddIncludedPaths(IEnumerable paths) { foreach (string path in paths) diff --git a/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorEmbeddingPolicyDefinition.cs b/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorEmbeddingPolicyDefinition.cs new file mode 100644 index 0000000000..9f47145048 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorEmbeddingPolicyDefinition.cs @@ -0,0 +1,42 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Fluent +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + + /// + /// fluent definition. + /// + internal class VectorEmbeddingPolicyDefinition + { + private readonly ContainerBuilder parent; + private readonly Action attachCallback; + private readonly Collection vectorEmbeddings; + + internal VectorEmbeddingPolicyDefinition( + ContainerBuilder parent, + Collection embeddings, + Action attachCallback) + { + this.parent = parent ?? throw new ArgumentNullException(nameof(parent)); + this.attachCallback = attachCallback ?? throw new ArgumentNullException(nameof(attachCallback)); + this.vectorEmbeddings = embeddings; + } + + /// + /// Applies the current definition to the parent. + /// + /// An instance of the parent. + public ContainerBuilder Attach() + { + VectorEmbeddingPolicy embeddingPolicy = new (this.vectorEmbeddings); + + this.attachCallback(embeddingPolicy); + return this.parent; + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorIndexDefinition.cs b/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorIndexDefinition.cs new file mode 100644 index 0000000000..ab5ded29c9 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Fluent/Settings/VectorIndexDefinition.cs @@ -0,0 +1,58 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Fluent +{ + using System; + + /// + /// Vector index fluent definition. + /// + /// + internal class VectorIndexDefinition + { + private readonly VectorIndexPath vectorIndexPath = new VectorIndexPath(); + private readonly T parent; + private readonly Action attachCallback; + + internal VectorIndexDefinition( + T parent, + Action attachCallback) + { + this.parent = parent; + this.attachCallback = attachCallback; + } + + /// + /// Add a path to the current definition with a particular set of s. + /// + /// Property path for the current definition. Example: /property + /// Set of to apply to the path. + /// An instance of the current . + public VectorIndexDefinition Path( + string path, + VectorIndexType indexType) + { + if (string.IsNullOrEmpty(path)) + { + throw new ArgumentNullException(nameof(path)); + } + + this.vectorIndexPath.Path = path; + this.vectorIndexPath.Type = indexType; + + return this; + } + + /// + /// Applies the current definition to the parent. + /// + /// An instance of the parent. + public T Attach() + { + this.attachCallback(this.vectorIndexPath); + return this.parent; + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/ContainerProperties.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/ContainerProperties.cs index 534f4f033d..4406ec5164 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Settings/ContainerProperties.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/ContainerProperties.cs @@ -78,6 +78,9 @@ public class ContainerProperties [JsonProperty(PropertyName = "clientEncryptionPolicy", NullValueHandling = NullValueHandling.Ignore)] private ClientEncryptionPolicy clientEncryptionPolicyInternal; + [JsonProperty(PropertyName = "vectorEmbeddingPolicy", NullValueHandling = NullValueHandling.Ignore)] + private VectorEmbeddingPolicy vectorEmbeddingPolicyInternal; + [JsonProperty(PropertyName = "computedProperties", NullValueHandling = NullValueHandling.Ignore)] private Collection computedProperties; @@ -289,6 +292,27 @@ public IndexingPolicy IndexingPolicy } } + /// + /// Gets or sets the vector embedding policy containing paths for embeddings along with path-specific settings for the item + /// used in performing vector search on the items in a collection in the Azure CosmosDB database service. + /// + /// + /// It is an optional property. + /// By default, VectorEmbeddingPolicy is set to null meaning the feature is turned off for the container. + /// + /// + /// + /// The will be applied to all the items in the container as the default policy. + /// + /// + [JsonIgnore] + internal VectorEmbeddingPolicy VectorEmbeddingPolicy + { + get => this.vectorEmbeddingPolicyInternal; + + set => this.vectorEmbeddingPolicyInternal = value; + } + /// /// Gets or sets the collection containing objects in the container. /// diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/DistanceFunction.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/DistanceFunction.cs new file mode 100644 index 0000000000..24a6b3eb25 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/DistanceFunction.cs @@ -0,0 +1,32 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos +{ + using System.Runtime.Serialization; + + /// + /// Defines the distance function for a vector index specification in the Azure Cosmos DB service. + /// + /// for usage. + internal enum DistanceFunction + { + /// + /// Represents the euclidean distance function. + /// + [EnumMember(Value = "euclidean")] + Euclidean, + + /// + /// Represents the cosine distance function. + /// + [EnumMember(Value = "cosine")] + Cosine, + + /// + /// Represents the dot product distance function. + /// + [EnumMember(Value = "dotproduct")] + DotProduct + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/Embedding.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/Embedding.cs new file mode 100644 index 0000000000..c27ac95bb1 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/Embedding.cs @@ -0,0 +1,77 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using Microsoft.Azure.Documents; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + using Newtonsoft.Json.Linq; + + /// + /// Represents the embedding settings for the vector index. + /// + internal class Embedding : IEquatable + { + /// + /// Gets or sets a string containing the path of the vector index. + /// + [JsonProperty(PropertyName = Constants.Properties.Path)] + public string Path { get; set; } + + /// + /// Gets or sets the representing the corresponding vector data type. + /// + [JsonProperty(PropertyName = "dataType")] + [JsonConverter(typeof(StringEnumConverter))] + public VectorDataType DataType { get; set; } + + /// + /// Gets or sets a long integer representing the dimensions of a vector. + /// + [JsonProperty(PropertyName = "dimensions")] + public ulong Dimensions { get; set; } + + /// + /// Gets or sets the which is used to calculate the respective distance between the vectors. + /// + [JsonProperty(PropertyName = "distanceFunction")] + [JsonConverter(typeof(StringEnumConverter))] + public DistanceFunction DistanceFunction { get; set; } + + /// + /// This contains additional values for scenarios where the SDK is not aware of new fields. + /// This ensures that if resource is read and updated none of the fields will be lost in the process. + /// + [JsonExtensionData] + internal IDictionary AdditionalProperties { get; private set; } + + /// + /// Ensures that the paths specified in the vector embedding policy are valid. + /// + public void ValidateEmbeddingPath() + { + if (string.IsNullOrEmpty(this.Path)) + { + throw new ArgumentException("Argument {0} can't be null or empty.", nameof(this.Path)); + } + + if (this.Path[0] != '/') + { + throw new ArgumentException("The argument {0} is not a valid path.", this.Path); + } + } + + /// + public bool Equals(Embedding that) + { + return this.Path.Equals(that.Path) + && this.DataType.Equals(that.DataType) + && this.Dimensions == that.Dimensions + && this.Dimensions.Equals(that.Dimensions); + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs index fa56bdbb54..aac39e7bfe 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs @@ -111,12 +111,45 @@ public IndexingPolicy() [JsonProperty(PropertyName = Constants.Properties.CompositeIndexes)] public Collection> CompositeIndexes { get; internal set; } = new Collection>(); + /// + /// Gets the vector indexes for additional indexes + /// + /// + /// + /// + internal Collection VectorIndexes + { + get => this.VectorIndexesInternal ??= new Collection(); + set => this.VectorIndexesInternal = value; + } + /// /// Collection of spatial index definitions to be used /// [JsonProperty(PropertyName = Constants.Properties.SpatialIndexes)] public Collection SpatialIndexes { get; internal set; } = new Collection(); + /// + /// Gets or Sets an internal placeholder collection to hold the vector indexes. + /// + [JsonProperty(PropertyName = "vectorIndexes", NullValueHandling = NullValueHandling.Ignore)] + internal Collection VectorIndexesInternal { get; set; } + /// /// This contains additional values for scenarios where the SDK is not aware of new fields. /// This ensures that if resource is read and updated none of the fields will be lost in the process. diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorDataType.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorDataType.cs new file mode 100644 index 0000000000..06be37b6fb --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorDataType.cs @@ -0,0 +1,37 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos +{ + using System.Runtime.Serialization; + + /// + /// Defines the target data type of a vector index specification in the Azure Cosmos DB service. + /// + internal enum VectorDataType + { + /// + /// Represent a float16 data type. + /// + [EnumMember(Value = "float16")] + Float16, + + /// + /// Represent a float32 data type. + /// + [EnumMember(Value = "float32")] + Float32, + + /// + /// Represent an uint8 data type. + /// + [EnumMember(Value = "uint8")] + Uint8, + + /// + /// Represent a int8 data type. + /// + [EnumMember(Value = "int8")] + Int8 + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorEmbeddingPolicy.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorEmbeddingPolicy.cs new file mode 100644 index 0000000000..8f8b7f904f --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorEmbeddingPolicy.cs @@ -0,0 +1,53 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos +{ + using System; + using System.Collections.Generic; + using System.Collections.ObjectModel; + using Newtonsoft.Json; + using Newtonsoft.Json.Linq; + + /// + /// Represents the vector embedding policy configuration for specifying the vector embeddings on documents in the collection in the Azure Cosmos DB service. + /// + /// + internal sealed class VectorEmbeddingPolicy + { + /// + /// Initializes a new instance of the class. + /// + /// List of embeddings to include in the policy definition. + public VectorEmbeddingPolicy(Collection embeddings) + { + VectorEmbeddingPolicy.ValidateEmbeddings(embeddings); + this.Embeddings = embeddings; + } + + /// + /// Gets a collection of that contains the vector embeddings of documents in collection in the Azure Cosmos DB service. + /// + [JsonProperty(PropertyName = "vectorEmbeddings")] + public readonly Collection Embeddings; + + /// + /// This contains additional values for scenarios where the SDK is not aware of new fields. + /// This ensures that if resource is read and updated none of the fields will be lost in the process. + /// + [JsonExtensionData] + internal IDictionary AdditionalProperties { get; private set; } + + /// + /// Ensures that the specified vector embeddings in the policy are valid. + /// + private static void ValidateEmbeddings( + IEnumerable embeddings) + { + foreach (Embedding item in embeddings) + { + item.ValidateEmbeddingPath(); + } + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexPath.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexPath.cs new file mode 100644 index 0000000000..c6c00801ac --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexPath.cs @@ -0,0 +1,67 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos +{ + using System.Collections.Generic; + using Microsoft.Azure.Documents; + using Newtonsoft.Json; + using Newtonsoft.Json.Converters; + using Newtonsoft.Json.Linq; + + /// + /// DOM for a vector index path. A vector index path is used in a vector index. + /// + /// + /// + /// + internal sealed class VectorIndexPath + { + /// + /// Gets or sets the full path in a document used for vector indexing. + /// + [JsonProperty(PropertyName = Constants.Properties.Path)] + public string Path { get; set; } + + /// + /// Gets or sets the for the vector index path. + /// + [JsonProperty(PropertyName = "type")] + [JsonConverter(typeof(StringEnumConverter))] + public VectorIndexType Type { get; set; } + + /// + /// This contains additional values for scenarios where the SDK is not aware of new fields. + /// This ensures that if resource is read and updated none of the fields will be lost in the process. + /// + [JsonExtensionData] + internal IDictionary AdditionalProperties { get; private set; } + } +} \ No newline at end of file diff --git a/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexType.cs b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexType.cs new file mode 100644 index 0000000000..64318c9389 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Resource/Settings/VectorIndexType.cs @@ -0,0 +1,31 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos +{ + using System.Runtime.Serialization; + + /// + /// Defines the target index type of an vector index path specification in the Azure Cosmos DB service. + /// + internal enum VectorIndexType + { + /// + /// Represents a flat vector index type. + /// + [EnumMember(Value = "flat")] + Flat, + + /// + /// Represents a Disk ANN vector index type. + /// + [EnumMember(Value = "diskANN")] + DiskANN, + + /// + /// Represents a quantized flat vector index type. + /// + [EnumMember(Value = "quantizedFlat")] + QuantizedFlat + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Fluent/ContainerSettingsTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Fluent/ContainerSettingsTests.cs index 8eb72cac25..a757aec147 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Fluent/ContainerSettingsTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/Fluent/ContainerSettingsTests.cs @@ -538,6 +538,88 @@ await databaseForChangeFeed.DefineContainer(containerName, partitionKeyPath) } } + [TestMethod] + [Ignore("This test will be enabled once the vector similarity changes are made available into the public emulator.")] + public async Task TestVectorEmbeddingPolicy() + { + string vector1Path = "/vector1", vector2Path = "/vector2", vector3Path = "/vector3"; + Database databaseForVectorEmbedding = await this.GetClient().CreateDatabaseAsync("vectorEmbeddingContainerTest", + cancellationToken: this.cancellationToken); + + try + { + Collection embeddings = new Collection() + { + new Embedding() + { + Path = vector1Path, + DataType = VectorDataType.Int8, + DistanceFunction = DistanceFunction.DotProduct, + Dimensions = 1200, + }, + new Embedding() + { + Path = vector2Path, + DataType = VectorDataType.Uint8, + DistanceFunction = DistanceFunction.Cosine, + Dimensions = 3, + }, + new Embedding() + { + Path = vector3Path, + DataType = VectorDataType.Float32, + DistanceFunction = DistanceFunction.Euclidean, + Dimensions = 400, + }, + }; + + string containerName = "vectorEmbeddingContainerTest"; + string partitionKeyPath = "/users"; + + ContainerResponse containerResponse = + await databaseForVectorEmbedding.DefineContainer(containerName, partitionKeyPath) + .WithVectorEmbeddingPolicy(embeddings) + .Attach() + .WithIndexingPolicy() + .WithVectorIndex() + .Path(vector1Path, VectorIndexType.Flat) + .Attach() + .WithVectorIndex() + .Path(vector2Path, VectorIndexType.Flat) + .Attach() + .WithVectorIndex() + .Path(vector3Path, VectorIndexType.Flat) + .Attach() + .Attach() + .CreateAsync(); + + Assert.AreEqual(HttpStatusCode.Created, containerResponse.StatusCode); + Assert.AreEqual(containerName, containerResponse.Resource.Id); + Assert.AreEqual(partitionKeyPath, containerResponse.Resource.PartitionKey.Paths.First()); + ContainerProperties containerSettings = containerResponse.Resource; + + // Validate Vector Embeddings. + Assert.IsNotNull(containerSettings.VectorEmbeddingPolicy); + Assert.IsNotNull(containerSettings.VectorEmbeddingPolicy.Embeddings); + Assert.AreEqual(embeddings.Count, containerSettings.VectorEmbeddingPolicy.Embeddings.Count()); + Assert.IsTrue(embeddings.OrderBy(x => x.Path).SequenceEqual(containerSettings.VectorEmbeddingPolicy.Embeddings.OrderBy(x => x.Path))); + + // Validate Vector Indexes. + Assert.IsNotNull(containerSettings.IndexingPolicy.VectorIndexes); + Assert.AreEqual(embeddings.Count, containerSettings.IndexingPolicy.VectorIndexes.Count()); + Assert.AreEqual(vector1Path, containerSettings.IndexingPolicy.VectorIndexes[0].Path); + Assert.AreEqual(VectorIndexType.Flat, containerSettings.IndexingPolicy.VectorIndexes[0].Type); + Assert.AreEqual(vector2Path, containerSettings.IndexingPolicy.VectorIndexes[1].Path); + Assert.AreEqual(VectorIndexType.Flat, containerSettings.IndexingPolicy.VectorIndexes[1].Type); + Assert.AreEqual(vector3Path, containerSettings.IndexingPolicy.VectorIndexes[2].Path); + Assert.AreEqual(VectorIndexType.Flat, containerSettings.IndexingPolicy.VectorIndexes[2].Type); + } + finally + { + await databaseForVectorEmbedding.DeleteAsync(); + } + } + [TestMethod] public async Task WithIndexingPolicy() { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosContainerSettingsTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosContainerSettingsTests.cs index bda132d6a3..bae30c8acd 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosContainerSettingsTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CosmosContainerSettingsTests.cs @@ -4,6 +4,7 @@ namespace Microsoft.Azure.Cosmos.Tests { + using System.Collections.Generic; using System.Collections.ObjectModel; using System.IO; using System.Linq; @@ -179,6 +180,85 @@ public void SettingPKShouldNotResetVersion() Assert.AreEqual(Cosmos.PartitionKeyDefinitionVersion.V2, containerProperties.PartitionKeyDefinitionVersion); } + [TestMethod] + public void ValidateVectorEmbeddingsAndIndexes() + { + Embedding embedding1 = new () + { + Path = "/vector1", + DataType = VectorDataType.Int8, + DistanceFunction = DistanceFunction.DotProduct, + Dimensions = 1200, + }; + + Embedding embedding2 = new () + { + Path = "/vector2", + DataType = VectorDataType.Uint8, + DistanceFunction = DistanceFunction.Cosine, + Dimensions = 3, + }; + + Embedding embedding3 = new () + { + Path = "/vector3", + DataType = VectorDataType.Float32, + DistanceFunction = DistanceFunction.Euclidean, + Dimensions = 400, + }; + + Collection embeddings = new Collection() + { + embedding1, + embedding2, + embedding3, + }; + + ContainerProperties containerSettings = new ContainerProperties(id: "TestContainer", partitionKeyPath: "/partitionKey") + { + VectorEmbeddingPolicy = new(embeddings), + IndexingPolicy = new Cosmos.IndexingPolicy() + { + VectorIndexes = new() + { + new VectorIndexPath() + { + Path = "/vector1", + Type = VectorIndexType.Flat, + }, + new VectorIndexPath() + { + Path = "/vector2", + Type = VectorIndexType.Flat, + }, + new VectorIndexPath() + { + Path = "/vector3", + Type = VectorIndexType.Flat, + } + }, + + }, + }; + + Assert.IsNotNull(containerSettings.IndexingPolicy); + Assert.IsNotNull(containerSettings.VectorEmbeddingPolicy); + Assert.IsNotNull(containerSettings.IndexingPolicy.VectorIndexes); + + VectorEmbeddingPolicy embeddingPolicy = containerSettings.VectorEmbeddingPolicy; + Assert.IsNotNull(embeddingPolicy.Embeddings); + Assert.AreEqual(embeddings.Count, embeddingPolicy.Embeddings.Count()); + CollectionAssert.AreEquivalent(embeddings, embeddingPolicy.Embeddings.ToList()); + + Collection vectorIndexes = containerSettings.IndexingPolicy.VectorIndexes; + Assert.AreEqual("/vector1", vectorIndexes[0].Path); + Assert.AreEqual(VectorIndexType.Flat, vectorIndexes[0].Type); + Assert.AreEqual("/vector2", vectorIndexes[1].Path); + Assert.AreEqual(VectorIndexType.Flat, vectorIndexes[1].Type); + Assert.AreEqual("/vector3", vectorIndexes[2].Path); + Assert.AreEqual(VectorIndexType.Flat, vectorIndexes[2].Type); + } + private static string SerializeDocumentCollection(DocumentCollection collection) { using (MemoryStream ms = new MemoryStream()) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/SettingsContractTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/SettingsContractTests.cs index 4cf045cd7b..0aebb7e07d 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/SettingsContractTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/SettingsContractTests.cs @@ -339,7 +339,7 @@ public void AccountPropertiesDeserializeWithAdditionalDataTest() [TestMethod] public void ContainerPropertiesDeserializeWithAdditionalDataTest() { - string cosmosSerialized = "{\"indexingPolicy\":{\"automatic\":true,\"indexingMode\":\"Consistent\",\"additionalIndexPolicy\":\"indexpolicyvalue\",\"includedPaths\":[{\"path\":\"/included/path\",\"additionalIncludedPath\":\"includedPathValue\",\"indexes\":[]}],\"excludedPaths\":[{\"path\":\"/excluded/path\",\"additionalExcludedPath\":\"excludedPathValue\"}],\"compositeIndexes\":[[{\"path\":\"/composite/path\",\"additionalCompositeIndex\":\"compositeIndexValue\",\"order\":\"ascending\"}]],\"spatialIndexes\":[{\"path\":\"/spatial/path\",\"additionalSpatialIndexes\":\"spatialIndexValue\",\"types\":[]}]},\"computedProperties\":[{\"name\":\"lowerName\",\"query\":\"SELECT VALUE LOWER(c.name) FROM c\"},{\"name\":\"estimatedTax\",\"query\":\"SELECT VALUE c.salary * 0.2 FROM c\"}],\"geospatialConfig\":{\"type\":\"Geography\",\"additionalGeospatialConfig\":\"geospatialConfigValue\"},\"uniqueKeyPolicy\":{\"additionalUniqueKeyPolicy\":\"uniqueKeyPolicyValue\",\"uniqueKeys\":[{\"paths\":[\"/unique/key/path/1\",\"/unique/key/path/2\"]}]},\"conflictResolutionPolicy\":{\"mode\":\"LastWriterWins\",\"additionalConflictResolutionPolicy\":\"conflictResolutionValue\"},\"clientEncryptionPolicy\":{\"includedPaths\":[{\"path\":\"/path\",\"clientEncryptionKeyId\":\"clientEncryptionKeyId\",\"encryptionType\":\"Randomized\",\"additionalIncludedPath\":\"includedPathValue\",\"encryptionAlgorithm\":\"AEAD_AES_256_CBC_HMAC_SHA256\"}],\"policyFormatVersion\":1,\"additionalEncryptionPolicy\":\"clientEncryptionpolicyValue\"},\"id\":\"2a9f501b-6948-4795-8fd1-797defb5c466\",\"partitionKey\":{\"paths\":[],\"kind\":\"Hash\"}}"; + string cosmosSerialized = "{\"indexingPolicy\":{\"automatic\":true,\"indexingMode\":\"Consistent\",\"additionalIndexPolicy\":\"indexpolicyvalue\",\"includedPaths\":[{\"path\":\"/included/path\",\"additionalIncludedPath\":\"includedPathValue\",\"indexes\":[]}],\"excludedPaths\":[{\"path\":\"/excluded/path\",\"additionalExcludedPath\":\"excludedPathValue\"}],\"compositeIndexes\":[[{\"path\":\"/composite/path\",\"additionalCompositeIndex\":\"compositeIndexValue\",\"order\":\"ascending\"}]],\"spatialIndexes\":[{\"path\":\"/spatial/path\",\"additionalSpatialIndexes\":\"spatialIndexValue\",\"types\":[]}],\"vectorIndexes\":[{\"path\":\"/vector1\",\"type\":\"flat\",\"additionalVectorIndex\":\"vectorIndexValue1\"},{\"path\":\"/vector2\",\"type\":\"quantizedFlat\",\"additionalVectorIndex\":\"vectorIndexValue2\"},{\"path\":\"/vector3\",\"type\":\"diskANN\"}]},\"computedProperties\":[{\"name\":\"lowerName\",\"query\":\"SELECT VALUE LOWER(c.name) FROM c\"},{\"name\":\"estimatedTax\",\"query\":\"SELECT VALUE c.salary * 0.2 FROM c\"}],\"geospatialConfig\":{\"type\":\"Geography\",\"additionalGeospatialConfig\":\"geospatialConfigValue\"},\"uniqueKeyPolicy\":{\"additionalUniqueKeyPolicy\":\"uniqueKeyPolicyValue\",\"uniqueKeys\":[{\"paths\":[\"/unique/key/path/1\",\"/unique/key/path/2\"]}]},\"conflictResolutionPolicy\":{\"mode\":\"LastWriterWins\",\"additionalConflictResolutionPolicy\":\"conflictResolutionValue\"},\"clientEncryptionPolicy\":{\"includedPaths\":[{\"path\":\"/path\",\"clientEncryptionKeyId\":\"clientEncryptionKeyId\",\"encryptionType\":\"Randomized\",\"additionalIncludedPath\":\"includedPathValue\",\"encryptionAlgorithm\":\"AEAD_AES_256_CBC_HMAC_SHA256\"}],\"policyFormatVersion\":1,\"additionalEncryptionPolicy\":\"clientEncryptionpolicyValue\"},\"id\":\"2a9f501b-6948-4795-8fd1-797defb5c466\",\"partitionKey\":{\"paths\":[],\"kind\":\"Hash\"},\"vectorEmbeddingPolicy\":{\"vectorEmbeddings\":[{\"path\":\"/vector1\",\"dataType\":\"float32\",\"dimensions\":1200,\"distanceFunction\":\"cosine\"},{\"path\":\"/vector2\",\"dataType\":\"int8\",\"dimensions\":3,\"distanceFunction\":\"dotproduct\"},{\"path\":\"/vector3\",\"dataType\":\"uint8\",\"dimensions\":400,\"distanceFunction\":\"euclidean\"}]}}"; JObject complexObject = JObject.FromObject(new { id = 1, name = new { fname = "fname", lname = "lname" } }); @@ -368,6 +368,14 @@ public void ContainerPropertiesDeserializeWithAdditionalDataTest() Assert.AreEqual(1, containerProperties.IndexingPolicy.CompositeIndexes[0][0].AdditionalProperties.Count); Assert.AreEqual("compositeIndexValue", containerProperties.IndexingPolicy.CompositeIndexes[0][0].AdditionalProperties["additionalCompositeIndex"]); + Assert.AreEqual(1, containerProperties.IndexingPolicy.VectorIndexes[0].AdditionalProperties.Count); + Assert.AreEqual("vectorIndexValue1", containerProperties.IndexingPolicy.VectorIndexes[0].AdditionalProperties["additionalVectorIndex"]); + + Assert.AreEqual(1, containerProperties.IndexingPolicy.VectorIndexes[1].AdditionalProperties.Count); + Assert.AreEqual("vectorIndexValue2", containerProperties.IndexingPolicy.VectorIndexes[1].AdditionalProperties["additionalVectorIndex"]); + + Assert.IsNull(containerProperties.IndexingPolicy.VectorIndexes[2].AdditionalProperties); + Assert.AreEqual(1, containerProperties.IndexingPolicy.IncludedPaths[0].AdditionalProperties.Count); Assert.AreEqual("includedPathValue", containerProperties.IndexingPolicy.IncludedPaths[0].AdditionalProperties["additionalIncludedPath"]); @@ -389,6 +397,13 @@ public void ContainerPropertiesDeserializeWithAdditionalDataTest() Assert.AreEqual(1, containerProperties.ClientEncryptionPolicy.IncludedPaths.First().AdditionalProperties.Count); Assert.AreEqual("includedPathValue", containerProperties.ClientEncryptionPolicy.IncludedPaths.First().AdditionalProperties["additionalIncludedPath"]); + Assert.IsNotNull(containerProperties.VectorEmbeddingPolicy); + Assert.AreEqual(3, containerProperties.VectorEmbeddingPolicy.Embeddings.Count); + Assert.AreEqual("/vector1", containerProperties.VectorEmbeddingPolicy.Embeddings[0].Path); + Assert.AreEqual(VectorDataType.Float32, containerProperties.VectorEmbeddingPolicy.Embeddings[0].DataType); + Assert.AreEqual((ulong)1200, containerProperties.VectorEmbeddingPolicy.Embeddings[0].Dimensions); + Assert.AreEqual(DistanceFunction.Cosine, containerProperties.VectorEmbeddingPolicy.Embeddings[0].DistanceFunction); + Assert.AreEqual(2, containerProperties.ComputedProperties.Count); Assert.AreEqual("lowerName", containerProperties.ComputedProperties[0].Name); Assert.AreEqual("SELECT VALUE LOWER(c.name) FROM c", containerProperties.ComputedProperties[0].Query); @@ -748,7 +763,8 @@ public void ContainerSettingsDefaults() "PartitionKeyDefinitionVersion", "ConflictResolutionPolicy", "ClientEncryptionPolicy", - "PartitionKeyPaths"); + "PartitionKeyPaths", + "VectorEmbeddingPolicy"); #endif // Two equivalent definitions @@ -1054,6 +1070,48 @@ public void ChangeFeedPolicySerialization_InvalidValues() Assert.ThrowsException(() => new Cosmos.ChangeFeedPolicy() { FullFidelityRetention = TimeSpan.FromSeconds(-10) }); } + [TestMethod] + public void VectorEmbeddingPolicySerialization() + { + ContainerProperties containerSettings = new ContainerProperties("TestContainer", "/pk"); + string serialization = JsonConvert.SerializeObject(containerSettings); + Assert.IsFalse(serialization.Contains("vectorEmbeddingPolicy"), "Vector Embedding Policy should not be included by default"); + + Embedding embedding1 = new() + { + Path = "/vector1", + DataType = VectorDataType.Int8, + DistanceFunction = DistanceFunction.DotProduct, + Dimensions = 1200, + }; + + Embedding embedding2 = new() + { + Path = "/vector2", + DataType = VectorDataType.Uint8, + DistanceFunction = DistanceFunction.Cosine, + Dimensions = 3, + }; + + Collection embeddings = new () + { + embedding1, + embedding2, + }; + + containerSettings.VectorEmbeddingPolicy = new VectorEmbeddingPolicy(embeddings); + + string serializationWithValues = JsonConvert.SerializeObject(containerSettings); + Assert.IsTrue(serializationWithValues.Contains("vectorEmbeddingPolicy"), "Vector Embedding Policy should be included."); + Assert.IsTrue(serializationWithValues.Contains("distanceFunction"), "Vector Embedding Policy distance function should be included."); + + JObject parsed = JObject.Parse(serializationWithValues); + JToken vectorEmbeddings = parsed["vectorEmbeddingPolicy"]["vectorEmbeddings"]; + Assert.AreEqual(JTokenType.Array, vectorEmbeddings.Type, "Vector Embedding Policy serialized vectorEmbeddings should be an array."); + Assert.IsTrue(embedding1.Equals(vectorEmbeddings.Value()[0].ToObject())); + Assert.IsTrue(embedding2.Equals(vectorEmbeddings.Value()[1].ToObject())); + } + private static T CosmosDeserialize(string payload) { using (MemoryStream ms = new MemoryStream())