elastic · Jan-Kazlouski-elastic · Aug 4, 2025 · Aug 4, 2025
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/package-lock.json b/package-lock.json
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
@@ -2,6 +2,8 @@ doc_id,doc_url,previous_version_doc_url,description
 ack-watch, https://www.elastic.co/docs/explore-analyze/alerts-cases/watcher/actions#example,,
 apis,https://www.elastic.co/docs/api/doc/elasticsearch,,
 add-nodes,https://www.elastic.co/docs/deploy-manage/maintenance/add-and-remove-elasticsearch-nodes,,
+ai21-api-models,https://docs.ai21.com/docs/jamba-foundation-models,,
+ai21-rate-limit,https://docs.ai21.com/reference/api-rate-limits,,
 alias-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-alias,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/indices-add-alias.html,
 aliases-update,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-update-aliases,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/indices-aliases.html,
 alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key,,
@@ -356,6 +358,7 @@ inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/o
 inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion,,
 inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/put-inference-api.html,
 inference-api-put-alibabacloud,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-alibabacloud-ai-search.html,
+inference-api-put-ai21,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-ai21,,
 inference-api-put-amazonbedrock,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-bedrock.html,
 inference-api-put-amazonsagemaker,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-amazonsagemaker,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-amazon-sagemaker.html,
 inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-anthropic.html,

@@ -0,0 +1,35 @@
+{
+  "inference.put_ai21": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-ai21.html",
+      "description": "Configure a AI21 inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{ai21_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
+            },
+            "ai21_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings"
+    }
+  }
+}
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -289,6 +289,42 @@ export interface CompletionTool {
   function: CompletionToolFunction
 }
 
+export class Ai21ServiceSettings {
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the AI21 models documentation for the list of supported models and versions.
+   * Service has been tested and confirmed to be working for `completion` and `chat_completion` tasks with the following models:
+   * * `jamba-mini`
+   * * `jamba-large`
+   * @ext_doc_id ai21-api-models
+   */
+  model_id: string
+  /**
+   * A valid API key for accessing AI21 API.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   */
+  api_key?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the AI21 API.
+   * By default, the `ai21` service sets the number of requests allowed per minute to 200. Please refer to AI21 documentation for more details.
+   * @ext_doc_id ai21-rate-limit
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum Ai21TaskType {
+  completion,
+  chat_completion
+}
+
+export enum Ai21ServiceType {
+  ai21
+}
+
 export class AlibabaCloudServiceSettings {
   /**
    * A valid API key for the AlibabaCloud AI Search API.

diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -21,6 +21,7 @@ import { integer } from '@_types/Numeric'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import {
   TaskType,
+  TaskTypeAi21,
   TaskTypeAlibabaCloudAI,
   TaskTypeAmazonBedrock,
   TaskTypeAmazonSageMaker,
@@ -77,6 +78,18 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
    */
   task_type: TaskType
 }
+
+export class InferenceEndpointInfoAi21 extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeAi21
+}
+
 export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint {
   /**
    * The inference Id

diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -33,6 +33,11 @@ export enum TaskTypeJinaAi {
   rerank
 }
 
+export enum TaskTypeAi21 {
+  completion,
+  chat_completion
+}
+
 export enum TaskTypeAlibabaCloudAI {
   text_embedding,
   rerank,

diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -31,6 +31,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
  *
  * The following integrations are available through the inference API. You can find the available task types next to the integration name:
+ * * AI21 (`chat_completion`, `completion`)
  * * AlibabaCloud AI Search (`completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Amazon Bedrock (`completion`, `text_embedding`)
  * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)

diff --git a/specification/inference/put_ai21/PutAi21Request.ts b/specification/inference/put_ai21/PutAi21Request.ts
@@ -0,0 +1,73 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import {
+  Ai21ServiceSettings,
+  Ai21ServiceType,
+  Ai21TaskType
+} from '@inference/_types/CommonTypes'
+
+/**
+ * Create a AI21 inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `ai21` service.
+ * @rest_spec_name inference.put_ai21
+ * @availability stack since=9.2.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-ai21
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{ai21_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: Ai21TaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    ai21_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The type of service supported for the specified task type. In this case, `ai21`.
+     */
+    service: Ai21ServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `ai21` service.
+     */
+    service_settings: Ai21ServiceSettings
+  }
+}
diff --git a/specification/inference/put_ai21/PutAi21Response.ts b/specification/inference/put_ai21/PutAi21Response.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfoAi21 } from '@inference/_types/Services'
+
+export class Response {
+  /** @codegen_name endpoint_info */
+  body: InferenceEndpointInfoAi21
+}
diff --git a/specification/inference/put_ai21/examples/request/PutAi21RequestExample1.yaml b/specification/inference/put_ai21/examples/request/PutAi21RequestExample1.yaml
@@ -0,0 +1,12 @@
+# summary:
+description: Run `PUT _inference/completion/ai21-completion` to create an AI21 inference endpoint that performs a `completion` task.
+method_request: 'PUT _inference/completion/ai21-completion'
+# type: "request"
+value: |-
+  {
+    "service": "ai21",
+    "service_settings": {
+      "api_key": "ai21-api-key",
+      "model_id": "jamba-large" 
+    }
+  }
diff --git a/specification/inference/put_ai21/examples/request/PutAi21RequestExample2.yaml b/specification/inference/put_ai21/examples/request/PutAi21RequestExample2.yaml
@@ -0,0 +1,12 @@
+# summary:
+description: Run `PUT _inference/chat-completion/ai21-chat-completion` to create a AI21 inference endpoint that performs a `chat_completion` task.
+method_request: 'PUT _inference/chat-completion/ai21-chat-completion'
+# type: "request"
+value: |-
+  {
+    "service": "ai21",
+    "service_settings": {
+      "api_key": "ai21-api-key",
+      "model_id": "jamba-mini" 
+    }
+  }