Skip to content

Commit eee68f2

Browse files
authored
[ML] Add SageMaker (#4940)
Create put request types, objects, and examples for SageMaker. Verified with `make validate api=inference.put`.
1 parent cf6914e commit eee68f2

14 files changed

+981
-112
lines changed

output/schema/schema.json

Lines changed: 580 additions & 112 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 46 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/_doc_ids/table.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api-
88
analysis-analyzers,https://www.elastic.co/docs/reference/text-analysis/analyzer-reference,,
99
amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html,,
1010
amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,,
11+
amazonsagemaker-invoke,https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html,,
12+
amazonsagemaker-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html,,
1113
analysis-charfilters,https://www.elastic.co/docs/reference/text-analysis/character-filter-reference,,
1214
analysis-normalizers,https://www.elastic.co/docs/reference/text-analysis/normalizers,,
1315
analysis-standard-analyzer,https://www.elastic.co/docs/reference/text-analysis/analysis-standard-analyzer,,
@@ -355,6 +357,7 @@ inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elast
355357
inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html,
356358
inference-api-put-alibabacloud,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html,
357359
inference-api-put-amazonbedrock,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html,
360+
inference-api-put-amazonsagemaker,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-sagemaker.html,
358361
inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html,
359362
inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html,
360363
inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html,

specification/inference/_types/CommonTypes.ts

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,149 @@ export enum AmazonBedrockServiceType {
442442
amazonbedrock
443443
}
444444

445+
export class AmazonSageMakerServiceSettings {
446+
/**
447+
* A valid AWS access key that has permissions to use Amazon SageMaker and access to models for invoking requests.
448+
*/
449+
access_key: string
450+
/**
451+
* The name of the SageMaker endpoint.
452+
* @ext_doc_id amazonsagemaker-invoke
453+
*/
454+
endpoint_name: string
455+
/**
456+
* The API format to use when calling SageMaker.
457+
* Elasticsearch will convert the POST _inference request to this data format when invoking the SageMaker endpoint.
458+
*/
459+
api: AmazonSageMakerApi
460+
/**
461+
* The region that your endpoint or Amazon Resource Name (ARN) is deployed in.
462+
* The list of available regions per model can be found in the Amazon SageMaker documentation.
463+
* @ext_doc_id amazonsagemaker-invoke
464+
*/
465+
region: string
466+
/**
467+
* A valid AWS secret key that is paired with the `access_key`.
468+
* For information about creating and managing access and secret keys, refer to the AWS documentation.
469+
* @ext_doc_id amazonsagemaker-secret-keys
470+
*/
471+
secret_key: string
472+
/**
473+
* The model ID when calling a multi-model endpoint.
474+
* @ext_doc_id amazonsagemaker-invoke
475+
*/
476+
target_model?: string
477+
/**
478+
* The container to directly invoke when calling a multi-container endpoint.
479+
* @ext_doc_id amazonsagemaker-invoke
480+
*/
481+
target_container_hostname?: string
482+
/**
483+
* The inference component to directly invoke when calling a multi-component endpoint.
484+
* @ext_doc_id amazonsagemaker-invoke
485+
*/
486+
inference_component_name?: string
487+
/**
488+
* The maximum number of inputs in each batch. This value is used by inference ingestion pipelines
489+
* when processing semantic values. It correlates to the number of times the SageMaker endpoint is
490+
* invoked (one per batch of input).
491+
* @server_default 256
492+
*/
493+
batch_size?: integer
494+
/**
495+
* The number of dimensions returned by the text embedding models. If this value is not provided, then
496+
* it is guessed by making invoking the endpoint for the `text_embedding` task.
497+
*/
498+
dimensions?: integer
499+
}
500+
501+
export enum AmazonSageMakerApi {
502+
openai,
503+
elastic
504+
}
505+
506+
/**
507+
* Service settings specific to the Elastic API for the Amazon SageMaker service.
508+
*/
509+
export class AmazonSageMakerElasticServiceSettings extends AmazonSageMakerServiceSettings {
510+
/**
511+
* Similarity measure used when invoking the `text_embedding` task type.
512+
*/
513+
similarity?: AmazonSageMakerSimilarity
514+
515+
/**
516+
* The data type returned by the text embedding model.
517+
* This value must be set when `task_type` is `text_embedding` and is used when parsing the response
518+
* back to Elasticsearch data structures.
519+
*/
520+
element_type: AmazonSageMakerElementType
521+
}
522+
523+
export enum AmazonSageMakerSimilarity {
524+
cosine,
525+
dot_product,
526+
l2_norm
527+
}
528+
529+
export enum AmazonSageMakerElementType {
530+
byte,
531+
float,
532+
bit
533+
}
534+
535+
export interface AmazonSageMakerTaskSettings {
536+
/**
537+
* The AWS custom attributes passed verbatim through to the model running in the SageMaker Endpoint.
538+
* Values will be returned in the `X-elastic-sagemaker-custom-attributes` header.
539+
* @ext_doc_id amazonsagemaker-invoke
540+
*/
541+
custom_attributes?: string
542+
/**
543+
* The optional JMESPath expression used to override the EnableExplanations provided during endpoint creation.
544+
* @ext_doc_id amazonsagemaker-invoke
545+
*/
546+
enable_explanations?: string
547+
/**
548+
* The capture data ID when enabled in the endpoint.
549+
* @ext_doc_id amazonsagemaker-invoke
550+
*/
551+
inference_id?: string
552+
/**
553+
* The stateful session identifier for a new or existing session.
554+
* New sessions will be returned in the `X-elastic-sagemaker-new-session-id` header.
555+
* Closed sessions will be returned in the `X-elastic-sagemaker-closed-session-id` header.
556+
* @ext_doc_id amazonsagemaker-invoke
557+
*/
558+
session_id?: string
559+
/**
560+
* Specifies the variant when running with multi-variant Endpoints.
561+
* @ext_doc_id amazonsagemaker-invoke
562+
*/
563+
target_variant?: string
564+
}
565+
566+
/**
567+
* `elastic` API allows any key value pair in the task settings when calling the inference endpoint, but it cannot
568+
* be used when creating the inference endpoint.
569+
*/
570+
export class AmazonSageMakerElasticTaskSettings
571+
implements AmazonSageMakerTaskSettings
572+
{
573+
[key: string]: unknown
574+
}
575+
576+
/**
577+
* `openai` API-specific task settings for Amazon SageMaker.
578+
*/
579+
export interface AmazonSageMakerOpenAiTaskSettings
580+
extends AmazonSageMakerTaskSettings {
581+
user?: string
582+
}
583+
584+
export enum AmazonSageMakerServiceType {
585+
amazon_sagemaker
586+
}
587+
445588
export class AnthropicServiceSettings {
446589
/**
447590
* A valid API key for the Anthropic API.

specification/inference/_types/Services.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
TaskType,
2424
TaskTypeAlibabaCloudAI,
2525
TaskTypeAmazonBedrock,
26+
TaskTypeAmazonSageMaker,
2627
TaskTypeAnthropic,
2728
TaskTypeAzureAIStudio,
2829
TaskTypeAzureOpenAI,
@@ -98,6 +99,17 @@ export class InferenceEndpointInfoAmazonBedrock extends InferenceEndpoint {
9899
task_type: TaskTypeAmazonBedrock
99100
}
100101

102+
export class InferenceEndpointInfoAmazonSageMaker extends InferenceEndpoint {
103+
/**
104+
* The inference Id
105+
*/
106+
inference_id: string
107+
/**
108+
* The task type
109+
*/
110+
task_type: TaskTypeAmazonSageMaker
111+
}
112+
101113
export class InferenceEndpointInfoAnthropic extends InferenceEndpoint {
102114
/**
103115
* The inference Id

specification/inference/_types/TaskType.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ export enum TaskTypeAmazonBedrock {
4545
completion
4646
}
4747

48+
export enum TaskTypeAmazonSageMaker {
49+
text_embedding,
50+
completion,
51+
chat_completion,
52+
sparse_embedding,
53+
rerank
54+
}
55+
4856
export enum TaskTypeAnthropic {
4957
completion
5058
}

specification/inference/put/PutRequest.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import { TaskType } from '@inference/_types/TaskType'
3333
* The following integrations are available through the inference API. You can find the available task types next to the integration name:
3434
* * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)
3535
* * Amazon Bedrock (`completion`, `text_embedding`)
36+
* * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
3637
* * Anthropic (`completion`)
3738
* * Azure AI Studio (`completion`, 'rerank', `text_embedding`)
3839
* * Azure OpenAI (`completion`, `text_embedding`)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { RequestBase } from '@_types/Base'
21+
import { Id } from '@_types/common'
22+
import { Duration } from '@_types/Time'
23+
import {
24+
AmazonSageMakerServiceSettings,
25+
AmazonSageMakerServiceType,
26+
AmazonSageMakerTaskSettings
27+
} from '@inference/_types/CommonTypes'
28+
import { InferenceChunkingSettings } from '@inference/_types/Services'
29+
import { TaskTypeAmazonSageMaker } from '@inference/_types/TaskType'
30+
31+
/**
32+
* Create an Amazon SageMaker inference endpoint.
33+
*
34+
* Create an inference endpoint to perform an inference task with the `amazon_sagemaker` service.
35+
* @rest_spec_name inference.put_amazonsagemaker
36+
* @availability stack since=9.1.0 stability=stable visibility=public
37+
* @availability serverless stability=stable visibility=public
38+
* @cluster_privileges manage_inference
39+
* @doc_id inference-api-put-amazonsagemaker
40+
*/
41+
export interface Request extends RequestBase {
42+
urls: [
43+
{
44+
path: '/_inference/{task_type}/{amazonsagemaker_inference_id}'
45+
methods: ['PUT']
46+
}
47+
]
48+
path_parts: {
49+
/**
50+
* The type of the inference task that the model will perform.
51+
*/
52+
task_type: TaskTypeAmazonSageMaker
53+
/**
54+
* The unique identifier of the inference endpoint.
55+
*/
56+
amazonsagemaker_inference_id: Id
57+
}
58+
query_parameters: {
59+
/**
60+
* Specifies the amount of time to wait for the inference endpoint to be created.
61+
* @server_default 30s
62+
*/
63+
timeout?: Duration
64+
}
65+
body: {
66+
/**
67+
* The chunking configuration object.
68+
* @ext_doc_id inference-chunking
69+
*/
70+
chunking_settings?: InferenceChunkingSettings
71+
/**
72+
* The type of service supported for the specified task type. In this case, `amazon_sagemaker`.
73+
*/
74+
service: AmazonSageMakerServiceType
75+
/**
76+
* Settings used to install the inference model.
77+
* These settings are specific to the `amazon_sagemaker` service and `service_settings.api` you specified.
78+
*/
79+
service_settings: AmazonSageMakerServiceSettings
80+
/**
81+
* Settings to configure the inference task.
82+
* These settings are specific to the task type and `service_settings.api` you specified.
83+
*/
84+
task_settings?: AmazonSageMakerTaskSettings
85+
}
86+
}

0 commit comments

Comments
 (0)