Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8fb29cc
Initial code changes to add vector embedding policy and index type in…
kundadebdatta Mar 28, 2024
5e8011a
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Mar 28, 2024
8e3a5c8
Code changes to add container builder for vector index type.
kundadebdatta Mar 29, 2024
287b2cd
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Mar 29, 2024
2a1556a
Code changes to add and fix tests.
kundadebdatta Apr 1, 2024
b63fdcd
Revert back client create and initialize test.
kundadebdatta Apr 1, 2024
1c46250
Skipping some of the V2 tests for vector indexes.
kundadebdatta Apr 1, 2024
4c69d61
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Apr 1, 2024
de2278c
Code changes to fix GA and preview contracts.
kundadebdatta Apr 1, 2024
5a80ec0
Code changes to address review comments part 1.
kundadebdatta Apr 3, 2024
c9e72b4
Removed unnecessary JSON argument for vector index.
kundadebdatta Apr 3, 2024
0643222
Code changes to update contract changes.
kundadebdatta Apr 4, 2024
7fc1dde
Code changes to update preview contract changes in tests.
kundadebdatta Apr 4, 2024
ad6cec8
Code changes to address few review comments.
kundadebdatta Apr 4, 2024
02e01bb
Code changes to add few more tests to validate serialization and dese…
kundadebdatta Apr 4, 2024
add9e94
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Apr 4, 2024
66f3a87
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Apr 4, 2024
55e3735
Code changes to address some review comments for best practices.
kundadebdatta Apr 5, 2024
99ea8c0
Code changes to fix test failures.
kundadebdatta Apr 5, 2024
6e225bf
Merge branch 'master' into users/kundadebdatta/4364_vector_index_add_…
kundadebdatta Apr 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Microsoft.Azure.Cosmos/src/Fluent/Settings/ContainerBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace Microsoft.Azure.Cosmos.Fluent
{
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Threading;
using System.Threading.Tasks;

Expand All @@ -20,6 +22,7 @@ public class ContainerBuilder : ContainerDefinition<ContainerBuilder>
private ConflictResolutionPolicy conflictResolutionPolicy;
private ChangeFeedPolicy changeFeedPolicy;
private ClientEncryptionPolicy clientEncryptionPolicy;
private VectorEmbeddingPolicy vectorEmbeddingPolicy;

/// <summary>
/// Creates an instance for unit-testing
Expand Down Expand Up @@ -114,6 +117,20 @@ public ClientEncryptionPolicyDefinition WithClientEncryptionPolicy(int policyFor
policyFormatVersion);
}

/// <summary>
/// Defined the vector embedding policy for this Azure Cosmos container
/// </summary>
/// <param name="embeddings">List of vector embeddings to include in the policy definition.</param>
/// <returns>An instance of <see cref="VectorEmbeddingPolicyDefinition"/>.</returns>
internal VectorEmbeddingPolicyDefinition WithVectorEmbeddingPolicy(
Collection<Embedding> embeddings)
{
return new VectorEmbeddingPolicyDefinition(
this,
embeddings,
(embeddingPolicy) => this.AddVectorEmbeddingPolicy(embeddingPolicy));
}

/// <summary>
/// Creates a container with the current fluent definition.
/// </summary>
Expand Down Expand Up @@ -220,6 +237,11 @@ public async Task<ContainerResponse> CreateIfNotExistsAsync(
containerProperties.ClientEncryptionPolicy = this.clientEncryptionPolicy;
}

if (this.vectorEmbeddingPolicy != null)
{
containerProperties.VectorEmbeddingPolicy = this.vectorEmbeddingPolicy;
}

return containerProperties;
}

Expand Down Expand Up @@ -254,5 +276,10 @@ private void AddClientEncryptionPolicy(ClientEncryptionPolicy clientEncryptionPo
{
this.clientEncryptionPolicy = clientEncryptionPolicy;
}

private void AddVectorEmbeddingPolicy(VectorEmbeddingPolicy embeddingPolicy)
{
this.vectorEmbeddingPolicy = embeddingPolicy;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,17 @@ public SpatialIndexDefinition<IndexingPolicyDefinition<T>> WithSpatialIndex()
(spatialIndex) => this.AddSpatialPath(spatialIndex));
}

/// <summary>
/// Defines a <see cref="VectorIndexPath"/> in the current <see cref="Container"/>'s definition.
/// </summary>
/// <returns>An instance of <see cref="VectorIndexDefinition{T}"/>.</returns>
internal VectorIndexDefinition<IndexingPolicyDefinition<T>> WithVectorIndex()
{
return new VectorIndexDefinition<IndexingPolicyDefinition<T>>(
this,
(vectorIndex) => this.AddVectorIndexPath(vectorIndex));
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
Expand All @@ -133,6 +144,11 @@ private void AddSpatialPath(SpatialPath spatialSpec)
this.indexingPolicy.SpatialIndexes.Add(spatialSpec);
}

private void AddVectorIndexPath(VectorIndexPath vectorIndexPath)
{
this.indexingPolicy.VectorIndexes.Add(vectorIndexPath);
}

private void AddIncludedPaths(IEnumerable<string> paths)
{
foreach (string path in paths)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Fluent
{
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;

/// <summary>
/// <see cref="VectorEmbeddingPolicy"/> fluent definition.
/// </summary>
internal class VectorEmbeddingPolicyDefinition
{
private readonly ContainerBuilder parent;
private readonly Action<VectorEmbeddingPolicy> attachCallback;
private readonly Collection<Embedding> vectorEmbeddings;

internal VectorEmbeddingPolicyDefinition(
ContainerBuilder parent,
Collection<Embedding> embeddings,
Action<VectorEmbeddingPolicy> attachCallback)
{
this.parent = parent ?? throw new ArgumentNullException(nameof(parent));
this.attachCallback = attachCallback ?? throw new ArgumentNullException(nameof(attachCallback));
this.vectorEmbeddings = embeddings;
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
/// <returns>An instance of the parent.</returns>
public ContainerBuilder Attach()
{
VectorEmbeddingPolicy embeddingPolicy = new (this.vectorEmbeddings);

this.attachCallback(embeddingPolicy);
return this.parent;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Fluent
{
using System;

/// <summary>
/// Vector index fluent definition.
/// </summary>
/// <seealso cref="VectorIndexPath"/>
internal class VectorIndexDefinition<T>
{
private readonly VectorIndexPath vectorIndexPath = new VectorIndexPath();
private readonly T parent;
private readonly Action<VectorIndexPath> attachCallback;

internal VectorIndexDefinition(
T parent,
Action<VectorIndexPath> attachCallback)
{
this.parent = parent;
this.attachCallback = attachCallback;
}

/// <summary>
/// Add a path to the current <see cref="VectorIndexPath"/> definition with a particular set of <see cref="VectorIndexType"/>s.
/// </summary>
/// <param name="path">Property path for the current definition. Example: /property</param>
/// <param name="indexType">Set of <see cref="VectorIndexType"/> to apply to the path.</param>
/// <returns>An instance of the current <see cref="VectorIndexDefinition{T}"/>.</returns>
public VectorIndexDefinition<T> Path(
string path,
VectorIndexType indexType)
{
if (string.IsNullOrEmpty(path))
{
throw new ArgumentNullException(nameof(path));
}

this.vectorIndexPath.Path = path;
this.vectorIndexPath.Type = indexType;

return this;
}

/// <summary>
/// Applies the current definition to the parent.
/// </summary>
/// <returns>An instance of the parent.</returns>
public T Attach()
{
this.attachCallback(this.vectorIndexPath);
return this.parent;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ public class ContainerProperties
[JsonProperty(PropertyName = "clientEncryptionPolicy", NullValueHandling = NullValueHandling.Ignore)]
private ClientEncryptionPolicy clientEncryptionPolicyInternal;

[JsonProperty(PropertyName = "vectorEmbeddingPolicy", NullValueHandling = NullValueHandling.Ignore)]
private VectorEmbeddingPolicy vectorEmbeddingPolicyInternal;

[JsonProperty(PropertyName = "computedProperties", NullValueHandling = NullValueHandling.Ignore)]
private Collection<ComputedProperty> computedProperties;

Expand Down Expand Up @@ -289,6 +292,27 @@ public IndexingPolicy IndexingPolicy
}
}

/// <summary>
/// Gets or sets the vector embedding policy containing paths for embeddings along with path-specific settings for the item
/// used in performing vector search on the items in a collection in the Azure CosmosDB database service.
/// </summary>
/// <value>
/// It is an optional property.
/// By default, VectorEmbeddingPolicy is set to null meaning the feature is turned off for the container.
/// </value>
/// <remarks>
/// <para>
/// The <see cref="Cosmos.VectorEmbeddingPolicy"/> will be applied to all the items in the container as the default policy.
/// </para>
/// </remarks>
[JsonIgnore]
internal VectorEmbeddingPolicy VectorEmbeddingPolicy
{
get => this.vectorEmbeddingPolicyInternal;

set => this.vectorEmbeddingPolicyInternal = value;
}

/// <summary>
/// Gets or sets the collection containing <see cref="ComputedProperty"/> objects in the container.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------
namespace Microsoft.Azure.Cosmos
{
using System.Runtime.Serialization;

/// <summary>
/// Defines the distance function for a vector index specification in the Azure Cosmos DB service.
/// </summary>
/// <seealso cref="Embedding"/> for usage.
internal enum DistanceFunction
{
/// <summary>
/// Represents the euclidean distance function.
/// </summary>
[EnumMember(Value = "euclidean")]
Euclidean,

/// <summary>
/// Represents the cosine distance function.
/// </summary>
[EnumMember(Value = "cosine")]
Cosine,

/// <summary>
/// Represents the dot product distance function.
/// </summary>
[EnumMember(Value = "dotproduct")]
DotProduct
}
}
77 changes: 77 additions & 0 deletions Microsoft.Azure.Cosmos/src/Resource/Settings/Embedding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;
using System.Collections.Generic;
using Microsoft.Azure.Documents;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using Newtonsoft.Json.Linq;

/// <summary>
/// Represents the embedding settings for the vector index.
/// </summary>
internal class Embedding : IEquatable<Embedding>
{
/// <summary>
/// Gets or sets a string containing the path of the vector index.
/// </summary>
[JsonProperty(PropertyName = Constants.Properties.Path)]
public string Path { get; set; }

/// <summary>
/// Gets or sets the <see cref="Cosmos.VectorDataType"/> representing the corresponding vector data type.
/// </summary>
[JsonProperty(PropertyName = "dataType")]
[JsonConverter(typeof(StringEnumConverter))]
public VectorDataType DataType { get; set; }

/// <summary>
/// Gets or sets a long integer representing the dimensions of a vector.
/// </summary>
[JsonProperty(PropertyName = "dimensions")]
public ulong Dimensions { get; set; }

/// <summary>
/// Gets or sets the <see cref="Cosmos.DistanceFunction"/> which is used to calculate the respective distance between the vectors.
/// </summary>
[JsonProperty(PropertyName = "distanceFunction")]
[JsonConverter(typeof(StringEnumConverter))]
public DistanceFunction DistanceFunction { get; set; }

/// <summary>
/// This contains additional values for scenarios where the SDK is not aware of new fields.
/// This ensures that if resource is read and updated none of the fields will be lost in the process.
/// </summary>
[JsonExtensionData]
internal IDictionary<string, JToken> AdditionalProperties { get; private set; }

/// <summary>
/// Ensures that the paths specified in the vector embedding policy are valid.
/// </summary>
public void ValidateEmbeddingPath()
{
if (string.IsNullOrEmpty(this.Path))
{
throw new ArgumentException("Argument {0} can't be null or empty.", nameof(this.Path));
}

if (this.Path[0] != '/')
{
throw new ArgumentException("The argument {0} is not a valid path.", this.Path);
}
}

/// <inheritdoc/>
public bool Equals(Embedding that)
{
return this.Path.Equals(that.Path)
&& this.DataType.Equals(that.DataType)
&& this.Dimensions == that.Dimensions
&& this.Dimensions.Equals(that.Dimensions);
}
}
}
33 changes: 33 additions & 0 deletions Microsoft.Azure.Cosmos/src/Resource/Settings/IndexingPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,45 @@ public IndexingPolicy()
[JsonProperty(PropertyName = Constants.Properties.CompositeIndexes)]
public Collection<Collection<CompositePath>> CompositeIndexes { get; internal set; } = new Collection<Collection<CompositePath>>();

/// <summary>
/// Gets the vector indexes for additional indexes
/// </summary>
/// <example>
/// <![CDATA[
/// "vectorIndexes": [
/// {
/// "path": "/vector1",
/// "type": "diskANN"
/// },
/// {
/// "path": "/vector2",
/// "type": "flat "
/// },
/// {
/// "path": "/vector3",
/// "type": "quantizedFlat"
/// }
/// ]
/// ]]>
/// </example>
internal Collection<VectorIndexPath> VectorIndexes
{
get => this.VectorIndexesInternal ??= new Collection<VectorIndexPath>();
set => this.VectorIndexesInternal = value;
}

/// <summary>
/// Collection of spatial index definitions to be used
/// </summary>
[JsonProperty(PropertyName = Constants.Properties.SpatialIndexes)]
public Collection<SpatialPath> SpatialIndexes { get; internal set; } = new Collection<SpatialPath>();

/// <summary>
/// Gets or Sets an internal placeholder collection to hold the vector indexes.
/// </summary>
[JsonProperty(PropertyName = "vectorIndexes", NullValueHandling = NullValueHandling.Ignore)]
internal Collection<VectorIndexPath> VectorIndexesInternal { get; set; }

/// <summary>
/// This contains additional values for scenarios where the SDK is not aware of new fields.
/// This ensures that if resource is read and updated none of the fields will be lost in the process.
Expand Down
Loading