diff --git a/docs/changelog/127134.yaml b/docs/changelog/127134.yaml new file mode 100644 index 0000000000000..97d6de2f99878 --- /dev/null +++ b/docs/changelog/127134.yaml @@ -0,0 +1,5 @@ +pr: 127134 +summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat +area: Vector Search +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml index f9c35db6711ac..e3c1155ed2000 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml @@ -568,3 +568,14 @@ setup: - match: { hits.hits.1._score: $default_rescore1 } - match: { hits.hits.2._score: $override_score2 } - match: { hits.hits.2._score: $default_rescore2 } + +--- +"default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: bbq_hnsw + + - match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml index 4145eabdbcb35..b508b7b2a923c 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml @@ -339,3 +339,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: bbq_flat + + - match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml index 64bf853449805..907c33575e4f0 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml @@ -495,3 +495,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int4_flat + + - not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml index 1087b5b264cf8..0fc3f3e4f91ce 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml @@ -436,3 +436,14 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } + +--- +"no default oversample value": + - requires: + cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"] + reason: "Needs default_oversample_value_for_bbq feature" + - do: + indices.get_mapping: + index: int8_flat + + - not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 8af1caf8c20de..96386ceca6c9f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -161,6 +161,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1); + public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 7e16f5754a132..18ae1fa802df6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -16,6 +16,7 @@ import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ; /** * Spec for mapper-related features. @@ -66,7 +67,8 @@ public Set getTestFeatures() { RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING, DateFieldMapper.INVALID_DATE_FIX, NPE_ON_DIMS_UPDATE_FIX, - RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING + RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING, + USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index b9ea4b78ec499..53a7e8836e3db 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -117,11 +117,15 @@ public static boolean isNotUnitVector(float magnitude) { public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS; public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; + public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ; public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector"); public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature( "mapper.dense_vector.rescore_zero_vector" ); + public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature( + "mapper.dense_vector.default_oversample_value_for_bbq" + ); public static final String CONTENT_TYPE = "dense_vector"; public static final short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions @@ -131,6 +135,7 @@ public static boolean isNotUnitVector(float magnitude) { // vector public static final int MAGNITUDE_BYTES = 4; public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed + public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value private static DenseVectorFieldMapper toType(FieldMapper in) { return (DenseVectorFieldMapper) in; @@ -1462,6 +1467,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti RescoreVector rescoreVector = null; if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); + if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) { + rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE); + } } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQHnswIndexOptions(m, efConstruction, rescoreVector); @@ -1483,6 +1491,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti RescoreVector rescoreVector = null; if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) { rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); + if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) { + rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE); + } } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQFlatIndexOptions(rescoreVector); @@ -2311,6 +2322,10 @@ int getVectorDimensions() { ElementType getElementType() { return elementType; } + + IndexOptions getIndexOptions() { + return indexOptions; + } } private final IndexOptions indexOptions; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 4ebda66ef0d6c..b1e8f04c4eb6c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() { } } + public void testDefaultOversampleValue() throws IOException { + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "bbq_hnsw"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); + } + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "bbq_flat"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); + } + { + DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 128); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "int8_hnsw"); + b.endObject(); + })); + + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field"); + DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper + .fieldType() + .getIndexOptions(); + assertNull(indexOptions.rescoreVector); + } + } + public void testDims() { { Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java index 1a964c8c2b4f7..9499edc71b4a6 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java @@ -46,6 +46,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT; import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; import static org.hamcrest.Matchers.containsString; @@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() { fieldName, k, numCands, - randomRescoreVectorBuilder(), + isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(), randomFloat() ); @@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() { return queryBuilder; } + private boolean isIndextypeBBQ() { + return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat"); + } + + protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() { + return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false)); + } + protected RescoreVectorBuilder randomRescoreVectorBuilder() { if (randomBoolean()) { return null;