diff --git a/output/schema/schema.json b/output/schema/schema.json
index 41dcb516e1..1feed84b09 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -9084,6 +9084,86 @@
         }
       ]
     },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.18.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform chat completion inference",
+      "docId": "inference-api-chat-completion",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html",
+      "name": "inference.chat_completion_unified",
+      "request": {
+        "name": "Request",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "responseMediaType": [
+        "text/event-stream"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/chat_completion/{inference_id}/_stream"
+        }
+      ]
+    },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform completion inference on the service",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html",
+      "name": "inference.completion",
+      "request": {
+        "name": "Request",
+        "namespace": "inference.completion"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.completion"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/completion/{inference_id}"
+        }
+      ]
+    },
     {
       "availability": {
         "serverless": {
@@ -9188,26 +9268,26 @@
           "visibility": "public"
         }
       },
-      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-      "docId": "inference-api-post",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html",
-      "name": "inference.inference",
+      "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "docId": "inference-api-put",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html",
+      "name": "inference.put",
       "privileges": {
         "cluster": [
-          "monitor_inference"
+          "manage_inference"
         ]
       },
       "request": {
         "name": "Request",
-        "namespace": "inference.inference"
+        "namespace": "inference.put"
       },
-      "requestBodyRequired": false,
+      "requestBodyRequired": true,
       "requestMediaType": [
         "application/json"
       ],
       "response": {
         "name": "Response",
-        "namespace": "inference.inference"
+        "namespace": "inference.put"
       },
       "responseMediaType": [
         "application/json"
@@ -9215,13 +9295,13 @@
       "urls": [
         {
           "methods": [
-            "POST"
+            "PUT"
           ],
           "path": "/_inference/{inference_id}"
         },
         {
           "methods": [
-            "POST"
+            "PUT"
           ],
           "path": "/_inference/{task_type}/{inference_id}"
         }
@@ -9234,15 +9314,15 @@
           "visibility": "public"
         },
         "stack": {
-          "since": "8.11.0",
+          "since": "8.16.0",
           "stability": "stable",
           "visibility": "public"
         }
       },
-      "description": "Create an inference endpoint.\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
-      "docId": "inference-api-put",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html",
-      "name": "inference.put",
+      "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
+      "docId": "inference-api-put-watsonx",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html",
+      "name": "inference.put_watsonx",
       "privileges": {
         "cluster": [
           "manage_inference"
@@ -9250,15 +9330,15 @@
       },
       "request": {
         "name": "Request",
-        "namespace": "inference.put"
+        "namespace": "inference.put_watsonx"
       },
-      "requestBodyRequired": true,
+      "requestBodyRequired": false,
       "requestMediaType": [
         "application/json"
       ],
       "response": {
         "name": "Response",
-        "namespace": "inference.put"
+        "namespace": "inference.put_watsonx"
       },
       "responseMediaType": [
         "application/json"
@@ -9268,13 +9348,7 @@
           "methods": [
             "PUT"
           ],
-          "path": "/_inference/{inference_id}"
-        },
-        {
-          "methods": [
-            "PUT"
-          ],
-          "path": "/_inference/{task_type}/{inference_id}"
+          "path": "/_inference/{task_type}/{watsonx_inference_id}"
         }
       ]
     },
@@ -9285,23 +9359,23 @@
           "visibility": "public"
         },
         "stack": {
-          "since": "8.16.0",
+          "since": "8.11.0",
           "stability": "stable",
           "visibility": "public"
         }
       },
-      "description": "Create a Watsonx inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.",
-      "docId": "inference-api-put-watsonx",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html",
-      "name": "inference.put_watsonx",
+      "description": "Perform rereanking inference on the service",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html",
+      "name": "inference.rerank",
       "privileges": {
         "cluster": [
-          "manage_inference"
+          "monitor_inference"
         ]
       },
       "request": {
         "name": "Request",
-        "namespace": "inference.put_watsonx"
+        "namespace": "inference.rerank"
       },
       "requestBodyRequired": false,
       "requestMediaType": [
@@ -9309,7 +9383,7 @@
       ],
       "response": {
         "name": "Response",
-        "namespace": "inference.put_watsonx"
+        "namespace": "inference.rerank"
       },
       "responseMediaType": [
         "application/json"
@@ -9317,9 +9391,49 @@
       "urls": [
         {
           "methods": [
-            "PUT"
+            "POST"
           ],
-          "path": "/_inference/{task_type}/{watsonx_inference_id}"
+          "path": "/_inference/rerank/{inference_id}"
+        }
+      ]
+    },
+    {
+      "availability": {
+        "serverless": {
+          "stability": "stable",
+          "visibility": "public"
+        },
+        "stack": {
+          "since": "8.11.0",
+          "stability": "stable",
+          "visibility": "public"
+        }
+      },
+      "description": "Perform sparse embedding inference on the service",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html",
+      "name": "inference.sparse_embedding",
+      "request": {
+        "name": "Request",
+        "namespace": "inference.sparse_embedding"
+      },
+      "requestBodyRequired": false,
+      "requestMediaType": [
+        "application/json"
+      ],
+      "response": {
+        "name": "Response",
+        "namespace": "inference.sparse_embedding"
+      },
+      "responseMediaType": [
+        "application/json"
+      ],
+      "urls": [
+        {
+          "methods": [
+            "POST"
+          ],
+          "path": "/_inference/sparse_embedding/{inference_id}"
         }
       ]
     },
@@ -9334,7 +9448,7 @@
       "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
       "docId": "inference-api-stream",
       "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html",
-      "name": "inference.stream_inference",
+      "name": "inference.stream_completion",
       "privileges": {
         "cluster": [
           "monitor_inference"
@@ -9342,7 +9456,7 @@
       },
       "request": {
         "name": "Request",
-        "namespace": "inference.stream_inference"
+        "namespace": "inference.stream_completion"
       },
       "requestBodyRequired": false,
       "requestMediaType": [
@@ -9350,7 +9464,7 @@
       ],
       "response": {
         "name": "Response",
-        "namespace": "inference.stream_inference"
+        "namespace": "inference.stream_completion"
       },
       "responseMediaType": [
         "text/event-stream"
@@ -9360,13 +9474,7 @@
           "methods": [
             "POST"
           ],
-          "path": "/_inference/{inference_id}/_stream"
-        },
-        {
-          "methods": [
-            "POST"
-          ],
-          "path": "/_inference/{task_type}/{inference_id}/_stream"
+          "path": "/_inference/completion/{inference_id}/_stream"
         }
       ]
     },
@@ -9377,17 +9485,18 @@
           "visibility": "public"
         },
         "stack": {
-          "since": "8.18.0",
+          "since": "8.11.0",
           "stability": "stable",
           "visibility": "public"
         }
       },
-      "description": "Perform inference on the service using the Unified Schema",
-      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
-      "name": "inference.unified_inference",
+      "description": "Perform text embedding inference on the service",
+      "docId": "inference-api-post",
+      "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html",
+      "name": "inference.text_embedding",
       "request": {
         "name": "Request",
-        "namespace": "inference.unified_inference"
+        "namespace": "inference.text_embedding"
       },
       "requestBodyRequired": false,
       "requestMediaType": [
@@ -9395,23 +9504,17 @@
       ],
       "response": {
         "name": "Response",
-        "namespace": "inference.unified_inference"
+        "namespace": "inference.text_embedding"
       },
       "responseMediaType": [
-        "text/event-stream"
+        "application/json"
       ],
       "urls": [
         {
           "methods": [
             "POST"
           ],
-          "path": "/_inference/{inference_id}/_unified"
-        },
-        {
-          "methods": [
-            "POST"
-          ],
-          "path": "/_inference/{task_type}/{inference_id}/_unified"
+          "path": "/_inference/text_embedding/{inference_id}"
         }
       ]
     },
@@ -9450,13 +9553,13 @@
       "urls": [
         {
           "methods": [
-            "POST"
+            "PUT"
           ],
           "path": "/_inference/{inference_id}/_update"
         },
         {
           "methods": [
-            "POST"
+            "PUT"
           ],
           "path": "/_inference/{task_type}/{inference_id}/_update"
         }
@@ -146579,6 +146682,31 @@
       },
       "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30"
     },
+    {
+      "kind": "interface",
+      "description": "Defines the completion result.",
+      "name": {
+        "name": "CompletionInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "completion",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L84-L89"
+    },
     {
       "kind": "interface",
       "description": "The completion result object",
@@ -146599,7 +146727,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L60-L65"
+      "specLocation": "inference/_types/Results.ts#L77-L82"
     },
     {
       "kind": "interface",
@@ -146630,7 +146758,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L92-L97"
+      "specLocation": "inference/_types/Results.ts#L110-L115"
     },
     {
       "kind": "type_alias",
@@ -146639,7 +146767,7 @@
         "name": "DenseByteVector",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/Results.ts#L40-L44",
+      "specLocation": "inference/_types/Results.ts#L47-L51",
       "type": {
         "kind": "array_of",
         "value": {
@@ -146839,104 +146967,6 @@
       ],
       "specLocation": "inference/_types/Services.ts#L46-L58"
     },
-    {
-      "kind": "interface",
-      "description": "InferenceResult is an aggregation of mutually exclusive variants",
-      "name": {
-        "name": "InferenceResult",
-        "namespace": "inference._types"
-      },
-      "properties": [
-        {
-          "name": "text_embedding_bytes",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "TextEmbeddingByteResult",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        },
-        {
-          "name": "text_embedding_bits",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "TextEmbeddingByteResult",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        },
-        {
-          "name": "text_embedding",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "TextEmbeddingResult",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        },
-        {
-          "name": "sparse_embedding",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "SparseEmbeddingResult",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        },
-        {
-          "name": "completion",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionResult",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        },
-        {
-          "name": "rerank",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "RankedDocument",
-                "namespace": "inference._types"
-              }
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/_types/Results.ts#L79-L90",
-      "variants": {
-        "kind": "container"
-      }
-    },
     {
       "kind": "interface",
       "description": "The rerank result object representing a single ranked document\nid: the original index of the document in the request\nrelevance_score: the relevance_score of the document relative to the query\ntext: Optional, the text of the document, if requested",
@@ -146979,7 +147009,7 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L67-L77"
+      "specLocation": "inference/_types/Results.ts#L91-L101"
     },
     {
       "kind": "interface",
@@ -147003,6 +147033,31 @@
       ],
       "specLocation": "inference/_types/Services.ts#L96-L101"
     },
+    {
+      "kind": "interface",
+      "description": "Defines the response for a rerank request.",
+      "name": {
+        "name": "RerankedInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "rerank",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "RankedDocument",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L103-L108"
+    },
     {
       "kind": "type_alias",
       "name": {
@@ -147014,6 +147069,31 @@
         "kind": "user_defined_value"
       }
     },
+    {
+      "kind": "interface",
+      "description": "The response format for the sparse embedding request.",
+      "name": {
+        "name": "SparseEmbeddingInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "sparse_embedding",
+          "required": true,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "SparseEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L40-L45"
+    },
     {
       "kind": "interface",
       "name": {
@@ -147087,13 +147167,16 @@
         },
         {
           "name": "completion"
+        },
+        {
+          "name": "chat_completion"
         }
       ],
       "name": {
         "name": "TaskType",
         "namespace": "inference._types"
       },
-      "specLocation": "inference/_types/TaskType.ts#L20-L28"
+      "specLocation": "inference/_types/TaskType.ts#L20-L29"
     },
     {
       "kind": "interface",
@@ -147115,7 +147198,63 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L46-L51"
+      "specLocation": "inference/_types/Results.ts#L53-L58"
+    },
+    {
+      "kind": "interface",
+      "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants",
+      "name": {
+        "name": "TextEmbeddingInferenceResult",
+        "namespace": "inference._types"
+      },
+      "properties": [
+        {
+          "name": "text_embedding_bytes",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding_bits",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingByteResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        },
+        {
+          "name": "text_embedding",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TextEmbeddingResult",
+                "namespace": "inference._types"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/_types/Results.ts#L67-L75",
+      "variants": {
+        "kind": "container"
+      }
     },
     {
       "kind": "interface",
@@ -147137,72 +147276,145 @@
           }
         }
       ],
-      "specLocation": "inference/_types/Results.ts#L53-L58"
+      "specLocation": "inference/_types/Results.ts#L60-L65"
     },
     {
-      "kind": "request",
-      "attachedBehaviors": [
-        "CommonQueryParameters"
-      ],
-      "body": {
-        "kind": "no_body"
+      "kind": "interface",
+      "description": "A list of tools that the model can call.",
+      "name": {
+        "name": "CompletionTool",
+        "namespace": "inference.chat_completion_unified"
       },
-      "description": "Delete an inference endpoint",
-      "inherits": {
-        "type": {
-          "name": "RequestBase",
-          "namespace": "_types"
+      "properties": [
+        {
+          "description": "The type of tool.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The function definition.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolFunction",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
         }
-      },
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227"
+    },
+    {
+      "kind": "interface",
+      "description": "Controls which tool is called by the model.",
       "name": {
-        "name": "Request",
-        "namespace": "inference.delete"
+        "name": "CompletionToolChoice",
+        "namespace": "inference.chat_completion_unified"
       },
-      "path": [
+      "properties": [
         {
-          "description": "The task type",
-          "name": "task_type",
-          "required": false,
+          "description": "The type of the tool.",
+          "name": "type",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TaskType",
-              "namespace": "inference._types"
+              "name": "string",
+              "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "The inference identifier.",
-          "name": "inference_id",
+          "description": "The tool choice function.",
+          "name": "function",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Id",
-              "namespace": "_types"
+              "name": "CompletionToolChoiceFunction",
+              "namespace": "inference.chat_completion_unified"
             }
           }
         }
       ],
-      "query": [
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190"
+    },
+    {
+      "kind": "interface",
+      "description": "The tool choice function.",
+      "name": {
+        "name": "CompletionToolChoiceFunction",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "properties": [
         {
-          "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.",
-          "name": "dry_run",
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176"
+    },
+    {
+      "kind": "interface",
+      "description": "The completion tool function definition.",
+      "name": {
+        "name": "CompletionToolFunction",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "properties": [
+        {
+          "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
+          "name": "description",
           "required": false,
-          "serverDefault": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "boolean",
+              "name": "string",
               "namespace": "_builtins"
             }
           }
         },
         {
-          "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.",
-          "name": "force",
+          "description": "The name of the function.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
+          "name": "parameters",
+          "required": false,
+          "type": {
+            "kind": "user_defined_value"
+          }
+        },
+        {
+          "description": "Whether to enable schema adherence when generating the function call.",
+          "name": "strict",
           "required": false,
-          "serverDefault": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -147212,25 +147424,169 @@
           }
         }
       ],
-      "specLocation": "inference/delete/DeleteRequest.ts#L24-L66"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213"
     },
     {
-      "kind": "response",
-      "body": {
-        "kind": "value",
-        "value": {
-          "kind": "instance_of",
+      "kind": "type_alias",
+      "codegenNames": [
+        "string",
+        "object"
+      ],
+      "name": {
+        "name": "CompletionToolType",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92",
+      "type": {
+        "kind": "union_of",
+        "items": [
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          },
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "CompletionToolChoice",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
+        ]
+      }
+    },
+    {
+      "kind": "interface",
+      "description": "An object style representation of a single portion of a conversation.",
+      "name": {
+        "name": "ContentObject",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "properties": [
+        {
+          "description": "The text content.",
+          "name": "text",
+          "required": true,
           "type": {
-            "name": "DeleteInferenceEndpointResult",
-            "namespace": "inference._types"
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The type of content.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
           }
         }
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106"
+    },
+    {
+      "kind": "interface",
+      "description": "An object representing part of the conversation.",
+      "name": {
+        "name": "Message",
+        "namespace": "inference.chat_completion_unified"
       },
+      "properties": [
+        {
+          "description": "The content of the message.",
+          "name": "content",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "MessageContent",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
+        },
+        {
+          "description": "The role of the message author.",
+          "name": "role",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The tool call that this message is responding to.",
+          "name": "tool_call_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The tool calls generated by the model.",
+          "name": "tool_calls",
+          "required": false,
+          "type": {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ToolCall",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165"
+    },
+    {
+      "kind": "type_alias",
+      "codegenNames": [
+        "string",
+        "object"
+      ],
       "name": {
-        "name": "Response",
-        "namespace": "inference.delete"
+        "name": "MessageContent",
+        "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/delete/DeleteResponse.ts#L22-L24"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143",
+      "type": {
+        "kind": "union_of",
+        "items": [
+          {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          },
+          {
+            "kind": "array_of",
+            "value": {
+              "kind": "instance_of",
+              "type": {
+                "name": "ContentObject",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          }
+        ]
+      }
     },
     {
       "kind": "request",
@@ -147238,9 +147594,116 @@
         "CommonQueryParameters"
       ],
       "body": {
-        "kind": "no_body"
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "A list of objects representing the conversation.",
+            "name": "messages",
+            "required": true,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "Message",
+                  "namespace": "inference.chat_completion_unified"
+                }
+              }
+            }
+          },
+          {
+            "description": "The ID of the model to use.",
+            "name": "model",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
+            "name": "max_completion_tokens",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "long",
+                "namespace": "_types"
+              }
+            }
+          },
+          {
+            "description": "A sequence of strings to control when the model should stop generating additional tokens.",
+            "name": "stop",
+            "required": false,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "string",
+                  "namespace": "_builtins"
+                }
+              }
+            }
+          },
+          {
+            "description": "The sampling temperature to use.",
+            "name": "temperature",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "float",
+                "namespace": "_types"
+              }
+            }
+          },
+          {
+            "description": "Controls which tool is called by the model.",
+            "name": "tool_choice",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "CompletionToolType",
+                "namespace": "inference.chat_completion_unified"
+              }
+            }
+          },
+          {
+            "description": "A list of tools that the model can call.",
+            "name": "tools",
+            "required": false,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "CompletionTool",
+                  "namespace": "inference.chat_completion_unified"
+                }
+              }
+            }
+          },
+          {
+            "description": "Nucleus sampling, an alternative to sampling with temperature.",
+            "name": "top_p",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "float",
+                "namespace": "_types"
+              }
+            }
+          }
+        ]
       },
-      "description": "Get an inference endpoint",
+      "description": "Perform chat completion inference",
       "inherits": {
         "type": {
           "name": "RequestBase",
@@ -147249,63 +147712,138 @@
       },
       "name": {
         "name": "Request",
-        "namespace": "inference.get"
+        "namespace": "inference.chat_completion_unified"
       },
       "path": [
         {
-          "description": "The task type",
-          "name": "task_type",
-          "required": false,
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TaskType",
-              "namespace": "inference._types"
+              "name": "Id",
+              "namespace": "_types"
             }
           }
-        },
+        }
+      ],
+      "query": [
         {
-          "description": "The inference Id",
-          "name": "inference_id",
+          "description": "Specifies the amount of time to wait for the inference request to complete.",
+          "name": "timeout",
           "required": false,
+          "serverDefault": "30s",
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Id",
+              "name": "Duration",
               "namespace": "_types"
             }
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/get/GetRequest.ts#L24-L56"
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87"
     },
     {
       "kind": "response",
       "body": {
-        "kind": "properties",
-        "properties": [
-          {
-            "name": "endpoints",
-            "required": true,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "InferenceEndpointInfo",
-                  "namespace": "inference._types"
-                }
-              }
-            }
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "StreamResult",
+            "namespace": "_types"
           }
-        ]
+        }
       },
       "name": {
         "name": "Response",
-        "namespace": "inference.get"
+        "namespace": "inference.chat_completion_unified"
       },
-      "specLocation": "inference/get/GetResponse.ts#L22-L26"
+      "specLocation": "inference/chat_completion_unified/UnifiedResponse.ts#L22-L24"
+    },
+    {
+      "kind": "interface",
+      "description": "A tool call generated by the model.",
+      "name": {
+        "name": "ToolCall",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "properties": [
+        {
+          "description": "The identifier of the tool call.",
+          "name": "id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "The function that the model called.",
+          "name": "function",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "ToolCallFunction",
+              "namespace": "inference.chat_completion_unified"
+            }
+          }
+        },
+        {
+          "description": "The type of the tool call.",
+          "name": "type",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138"
+    },
+    {
+      "kind": "interface",
+      "description": "The function that the model called.",
+      "name": {
+        "name": "ToolCallFunction",
+        "namespace": "inference.chat_completion_unified"
+      },
+      "properties": [
+        {
+          "description": "The arguments to call the function with in JSON format.",
+          "name": "arguments",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The name of the function to call.",
+          "name": "name",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120"
     },
     {
       "kind": "request",
@@ -147316,19 +147854,7 @@
         "kind": "properties",
         "properties": [
           {
-            "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.",
-            "name": "query",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
-            }
-          },
-          {
-            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
+            "description": "Inference input.\nEither a string or an array of strings.",
             "name": "input",
             "required": true,
             "type": {
@@ -147355,7 +147881,7 @@
             }
           },
           {
-            "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
+            "description": "Optional task settings",
             "name": "task_settings",
             "required": false,
             "type": {
@@ -147368,7 +147894,7 @@
           }
         ]
       },
-      "description": "Perform inference on the service.\n\nThis API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.",
+      "description": "Perform completion inference on the service",
       "inherits": {
         "type": {
           "name": "RequestBase",
@@ -147377,11 +147903,79 @@
       },
       "name": {
         "name": "Request",
-        "namespace": "inference.inference"
+        "namespace": "inference.completion"
       },
       "path": [
         {
-          "description": "The type of inference task that the model performs.",
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [
+        {
+          "description": "Specifies the amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Duration",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "specLocation": "inference/completion/CompletionRequest.ts#L25-L63"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "CompletionInferenceResult",
+            "namespace": "inference._types"
+          }
+        }
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.completion"
+      },
+      "specLocation": "inference/completion/CompletionResponse.ts#L22-L24"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "no_body"
+      },
+      "description": "Delete an inference endpoint",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.delete"
+      },
+      "path": [
+        {
+          "description": "The task type",
           "name": "task_type",
           "required": false,
           "type": {
@@ -147393,7 +147987,7 @@
           }
         },
         {
-          "description": "The unique identifier for the inference endpoint.",
+          "description": "The inference identifier.",
           "name": "inference_id",
           "required": true,
           "type": {
@@ -147407,20 +148001,33 @@
       ],
       "query": [
         {
-          "description": "The amount of time to wait for the inference request to complete.",
-          "name": "timeout",
+          "description": "When true, the endpoint is not deleted and a list of ingest processors which reference this endpoint is returned.",
+          "name": "dry_run",
           "required": false,
-          "serverDefault": "30s",
+          "serverDefault": false,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "Duration",
-              "namespace": "_types"
+              "name": "boolean",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "When true, the inference endpoint is forcefully deleted even if it is still being used by ingest processors or semantic text fields.",
+          "name": "force",
+          "required": false,
+          "serverDefault": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "boolean",
+              "namespace": "_builtins"
             }
           }
         }
       ],
-      "specLocation": "inference/inference/InferenceRequest.ts#L26-L89"
+      "specLocation": "inference/delete/DeleteRequest.ts#L24-L66"
     },
     {
       "kind": "response",
@@ -147429,16 +148036,91 @@
         "value": {
           "kind": "instance_of",
           "type": {
-            "name": "InferenceResult",
+            "name": "DeleteInferenceEndpointResult",
             "namespace": "inference._types"
           }
         }
       },
       "name": {
         "name": "Response",
-        "namespace": "inference.inference"
+        "namespace": "inference.delete"
       },
-      "specLocation": "inference/inference/InferenceResponse.ts#L22-L24"
+      "specLocation": "inference/delete/DeleteResponse.ts#L22-L24"
+    },
+    {
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "no_body"
+      },
+      "description": "Get an inference endpoint",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "Request",
+        "namespace": "inference.get"
+      },
+      "path": [
+        {
+          "description": "The task type",
+          "name": "task_type",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TaskType",
+              "namespace": "inference._types"
+            }
+          }
+        },
+        {
+          "description": "The inference Id",
+          "name": "inference_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "Id",
+              "namespace": "_types"
+            }
+          }
+        }
+      ],
+      "query": [],
+      "specLocation": "inference/get/GetRequest.ts#L24-L56"
+    },
+    {
+      "kind": "response",
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "name": "endpoints",
+            "required": true,
+            "type": {
+              "kind": "array_of",
+              "value": {
+                "kind": "instance_of",
+                "type": {
+                  "name": "InferenceEndpointInfo",
+                  "namespace": "inference._types"
+                }
+              }
+            }
+          }
+        ]
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.get"
+      },
+      "specLocation": "inference/get/GetResponse.ts#L22-L26"
     },
     {
       "kind": "request",
@@ -147729,7 +148411,19 @@
         "kind": "properties",
         "properties": [
           {
-            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.",
+            "description": "Query input.",
+            "name": "query",
+            "required": true,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "string",
+                "namespace": "_builtins"
+              }
+            }
+          },
+          {
+            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.",
             "name": "input",
             "required": true,
             "type": {
@@ -147754,10 +148448,22 @@
                 }
               ]
             }
+          },
+          {
+            "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.",
+            "name": "task_settings",
+            "required": false,
+            "type": {
+              "kind": "instance_of",
+              "type": {
+                "name": "TaskSettings",
+                "namespace": "inference._types"
+              }
+            }
           }
         ]
       },
-      "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
+      "description": "Perform rereanking inference on the service",
       "inherits": {
         "type": {
           "name": "RequestBase",
@@ -147766,7 +148472,7 @@
       },
       "name": {
         "name": "Request",
-        "namespace": "inference.stream_inference"
+        "namespace": "inference.rerank"
       },
       "path": [
         {
@@ -147780,22 +148486,24 @@
               "namespace": "_types"
             }
           }
-        },
+        }
+      ],
+      "query": [
         {
-          "description": "The type of task that the model performs.",
-          "name": "task_type",
+          "description": "The amount of time to wait for the inference request to complete.",
+          "name": "timeout",
           "required": false,
+          "serverDefault": "30s",
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "TaskType",
-              "namespace": "inference._types"
+              "name": "Duration",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "query": [],
-      "specLocation": "inference/stream_inference/StreamInferenceRequest.ts#L24-L67"
+      "specLocation": "inference/rerank/RerankRequest.ts#L25-L72"
     },
     {
       "kind": "response",
@@ -147804,267 +148512,191 @@
         "value": {
           "kind": "instance_of",
           "type": {
-            "name": "StreamResult",
-            "namespace": "_types"
+            "name": "RerankedInferenceResult",
+            "namespace": "inference._types"
           }
         }
       },
       "name": {
         "name": "Response",
-        "namespace": "inference.stream_inference"
+        "namespace": "inference.rerank"
       },
-      "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L22-L24"
+      "specLocation": "inference/rerank/RerankResponse.ts#L22-L24"
     },
     {
-      "kind": "interface",
-      "description": "A list of tools that the model can call.",
-      "name": {
-        "name": "CompletionTool",
-        "namespace": "inference.unified_inference"
-      },
-      "properties": [
-        {
-          "description": "The type of tool.",
-          "name": "type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
+      ],
+      "body": {
+        "kind": "properties",
+        "properties": [
+          {
+            "description": "Inference input.\nEither a string or an array of strings.",
+            "name": "input",
+            "required": true,
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "kind": "union_of",
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "array_of",
+                  "value": {
+                    "kind": "instance_of",
+                    "type": {
+                      "name": "string",
+                      "namespace": "_builtins"
+                    }
+                  }
+                }
+              ]
             }
-          }
-        },
-        {
-          "description": "The function definition.",
-          "name": "function",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
+          },
+          {
+            "description": "Optional task settings",
+            "name": "task_settings",
+            "required": false,
             "type": {
-              "name": "CompletionToolFunction",
-              "namespace": "inference.unified_inference"
+              "kind": "instance_of",
+              "type": {
+                "name": "TaskSettings",
+                "namespace": "inference._types"
+              }
             }
           }
+        ]
+      },
+      "description": "Perform sparse embedding inference on the service",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
         }
-      ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L223-L235"
-    },
-    {
-      "kind": "interface",
-      "description": "Controls which tool is called by the model.",
+      },
       "name": {
-        "name": "CompletionToolChoice",
-        "namespace": "inference.unified_inference"
+        "name": "Request",
+        "namespace": "inference.sparse_embedding"
       },
-      "properties": [
-        {
-          "description": "The type of the tool.",
-          "name": "type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
+      "path": [
         {
-          "description": "The tool choice function.",
-          "name": "function",
+          "description": "The inference Id",
+          "name": "inference_id",
           "required": true,
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "CompletionToolChoiceFunction",
-              "namespace": "inference.unified_inference"
+              "name": "Id",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L186-L198"
-    },
-    {
-      "kind": "interface",
-      "description": "The tool choice function.",
-      "name": {
-        "name": "CompletionToolChoiceFunction",
-        "namespace": "inference.unified_inference"
-      },
-      "properties": [
+      "query": [
         {
-          "description": "The name of the function to call.",
-          "name": "name",
-          "required": true,
+          "description": "Specifies the amount of time to wait for the inference request to complete.",
+          "name": "timeout",
+          "required": false,
+          "serverDefault": "30s",
           "type": {
             "kind": "instance_of",
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "name": "Duration",
+              "namespace": "_types"
             }
           }
         }
       ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L175-L184"
+      "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63"
     },
     {
-      "kind": "interface",
-      "description": "The completion tool function definition.",
-      "name": {
-        "name": "CompletionToolFunction",
-        "namespace": "inference.unified_inference"
-      },
-      "properties": [
-        {
-          "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.",
-          "name": "description",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the function.",
-          "name": "name",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The parameters the functional accepts. This should be formatted as a JSON object.",
-          "name": "parameters",
-          "required": false,
-          "type": {
-            "kind": "user_defined_value"
-          }
-        },
-        {
-          "description": "Whether to enable schema adherence when generating the function call.",
-          "name": "strict",
-          "required": false,
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
           "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "boolean",
-              "namespace": "_builtins"
-            }
+            "name": "SparseEmbeddingInferenceResult",
+            "namespace": "inference._types"
           }
         }
-      ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L200-L221"
+      },
+      "name": {
+        "name": "Response",
+        "namespace": "inference.sparse_embedding"
+      },
+      "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24"
     },
     {
-      "kind": "type_alias",
-      "codegenNames": [
-        "string",
-        "object"
+      "kind": "request",
+      "attachedBehaviors": [
+        "CommonQueryParameters"
       ],
-      "name": {
-        "name": "CompletionToolType",
-        "namespace": "inference.unified_inference"
-      },
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L97-L100",
-      "type": {
-        "kind": "union_of",
-        "items": [
+      "body": {
+        "kind": "properties",
+        "properties": [
           {
-            "kind": "instance_of",
+            "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.",
+            "name": "input",
+            "required": true,
             "type": {
-              "name": "string",
-              "namespace": "_builtins"
+              "kind": "union_of",
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "array_of",
+                  "value": {
+                    "kind": "instance_of",
+                    "type": {
+                      "name": "string",
+                      "namespace": "_builtins"
+                    }
+                  }
+                }
+              ]
             }
           },
           {
-            "kind": "instance_of",
+            "description": "Optional task settings",
+            "name": "task_settings",
+            "required": false,
             "type": {
-              "name": "CompletionToolChoice",
-              "namespace": "inference.unified_inference"
+              "kind": "instance_of",
+              "type": {
+                "name": "TaskSettings",
+                "namespace": "inference._types"
+              }
             }
           }
         ]
-      }
-    },
-    {
-      "kind": "interface",
-      "description": "An object style representation of a single portion of a conversation.",
-      "name": {
-        "name": "ContentObject",
-        "namespace": "inference.unified_inference"
       },
-      "properties": [
-        {
-          "description": "The text content.",
-          "name": "text",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The type of content.",
-          "name": "type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
+      "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.",
+      "inherits": {
+        "type": {
+          "name": "RequestBase",
+          "namespace": "_types"
         }
-      ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L102-L114"
-    },
-    {
-      "kind": "interface",
-      "description": "An object representing part of the conversation.",
+      },
       "name": {
-        "name": "Message",
-        "namespace": "inference.unified_inference"
+        "name": "Request",
+        "namespace": "inference.stream_completion"
       },
-      "properties": [
-        {
-          "description": "The content of the message.",
-          "name": "content",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "MessageContent",
-              "namespace": "inference.unified_inference"
-            }
-          }
-        },
+      "path": [
         {
-          "description": "The role of the message author.",
-          "name": "role",
+          "description": "The unique identifier for the inference endpoint.",
+          "name": "inference_id",
           "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The tool call that this message is responding to.",
-          "name": "tool_call_id",
-          "required": false,
           "type": {
             "kind": "instance_of",
             "type": {
@@ -148072,58 +148704,28 @@
               "namespace": "_types"
             }
           }
-        },
-        {
-          "description": "The tool calls generated by the model.",
-          "name": "tool_calls",
-          "required": false,
-          "type": {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "ToolCall",
-                "namespace": "inference.unified_inference"
-              }
-            }
-          }
         }
       ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L153-L173"
+      "query": [],
+      "specLocation": "inference/stream_completion/StreamInferenceRequest.ts#L24-L63"
     },
     {
-      "kind": "type_alias",
-      "codegenNames": [
-        "string",
-        "object"
-      ],
+      "kind": "response",
+      "body": {
+        "kind": "value",
+        "value": {
+          "kind": "instance_of",
+          "type": {
+            "name": "StreamResult",
+            "namespace": "_types"
+          }
+        }
+      },
       "name": {
-        "name": "MessageContent",
-        "namespace": "inference.unified_inference"
+        "name": "Response",
+        "namespace": "inference.stream_completion"
       },
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L148-L151",
-      "type": {
-        "kind": "union_of",
-        "items": [
-          {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          },
-          {
-            "kind": "array_of",
-            "value": {
-              "kind": "instance_of",
-              "type": {
-                "name": "ContentObject",
-                "namespace": "inference.unified_inference"
-              }
-            }
-          }
-        ]
-      }
+      "specLocation": "inference/stream_completion/StreamInferenceResponse.ts#L22-L24"
     },
     {
       "kind": "request",
@@ -148134,113 +148736,47 @@
         "kind": "properties",
         "properties": [
           {
-            "description": "A list of objects representing the conversation.",
-            "name": "messages",
+            "description": "Inference input.\nEither a string or an array of strings.",
+            "name": "input",
             "required": true,
             "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "Message",
-                  "namespace": "inference.unified_inference"
-                }
-              }
-            }
-          },
-          {
-            "description": "The ID of the model to use.",
-            "name": "model",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "string",
-                "namespace": "_builtins"
-              }
-            }
-          },
-          {
-            "description": "The upper bound limit for the number of tokens that can be generated for a completion request.",
-            "name": "max_completion_tokens",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "long",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "A sequence of strings to control when the model should stop generating additional tokens.",
-            "name": "stop",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "string",
-                  "namespace": "_builtins"
-                }
-              }
-            }
-          },
-          {
-            "description": "The sampling temperature to use.",
-            "name": "temperature",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "float",
-                "namespace": "_types"
-              }
-            }
-          },
-          {
-            "description": "Controls which tool is called by the model.",
-            "name": "tool_choice",
-            "required": false,
-            "type": {
-              "kind": "instance_of",
-              "type": {
-                "name": "CompletionToolType",
-                "namespace": "inference.unified_inference"
-              }
-            }
-          },
-          {
-            "description": "A list of tools that the model can call.",
-            "name": "tools",
-            "required": false,
-            "type": {
-              "kind": "array_of",
-              "value": {
-                "kind": "instance_of",
-                "type": {
-                  "name": "CompletionTool",
-                  "namespace": "inference.unified_inference"
+              "kind": "union_of",
+              "items": [
+                {
+                  "kind": "instance_of",
+                  "type": {
+                    "name": "string",
+                    "namespace": "_builtins"
+                  }
+                },
+                {
+                  "kind": "array_of",
+                  "value": {
+                    "kind": "instance_of",
+                    "type": {
+                      "name": "string",
+                      "namespace": "_builtins"
+                    }
+                  }
                 }
-              }
+              ]
             }
           },
           {
-            "description": "Nucleus sampling, an alternative to sampling with temperature.",
-            "name": "top_p",
+            "description": "Optional task settings",
+            "name": "task_settings",
             "required": false,
             "type": {
               "kind": "instance_of",
               "type": {
-                "name": "float",
-                "namespace": "_types"
+                "name": "TaskSettings",
+                "namespace": "inference._types"
               }
             }
           }
         ]
       },
-      "description": "Perform inference on the service using the Unified Schema",
+      "description": "Perform text embedding inference on the service",
       "inherits": {
         "type": {
           "name": "RequestBase",
@@ -148249,21 +148785,9 @@
       },
       "name": {
         "name": "Request",
-        "namespace": "inference.unified_inference"
+        "namespace": "inference.text_embedding"
       },
       "path": [
-        {
-          "description": "The task type",
-          "name": "task_type",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "TaskType",
-              "namespace": "inference._types"
-            }
-          }
-        },
         {
           "description": "The inference Id",
           "name": "inference_id",
@@ -148292,7 +148816,7 @@
           }
         }
       ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L27-L95"
+      "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63"
     },
     {
       "kind": "response",
@@ -148301,98 +148825,16 @@
         "value": {
           "kind": "instance_of",
           "type": {
-            "name": "StreamResult",
-            "namespace": "_types"
+            "name": "TextEmbeddingInferenceResult",
+            "namespace": "inference._types"
           }
         }
       },
       "name": {
         "name": "Response",
-        "namespace": "inference.unified_inference"
-      },
-      "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24"
-    },
-    {
-      "kind": "interface",
-      "description": "A tool call generated by the model.",
-      "name": {
-        "name": "ToolCall",
-        "namespace": "inference.unified_inference"
+        "namespace": "inference.text_embedding"
       },
-      "properties": [
-        {
-          "description": "The identifier of the tool call.",
-          "name": "id",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "Id",
-              "namespace": "_types"
-            }
-          }
-        },
-        {
-          "description": "The function that the model called.",
-          "name": "function",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "ToolCallFunction",
-              "namespace": "inference.unified_inference"
-            }
-          }
-        },
-        {
-          "description": "The type of the tool call.",
-          "name": "type",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L130-L146"
-    },
-    {
-      "kind": "interface",
-      "description": "The function that the model called.",
-      "name": {
-        "name": "ToolCallFunction",
-        "namespace": "inference.unified_inference"
-      },
-      "properties": [
-        {
-          "description": "The arguments to call the function with in JSON format.",
-          "name": "arguments",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        },
-        {
-          "description": "The name of the function to call.",
-          "name": "name",
-          "required": true,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "string",
-              "namespace": "_builtins"
-            }
-          }
-        }
-      ],
-      "specLocation": "inference/unified_inference/UnifiedRequest.ts#L116-L128"
+      "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24"
     },
     {
       "kind": "request",
diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json
index 7404da1355..f2ed749089 100644
--- a/output/schema/validation-errors.json
+++ b/output/schema/validation-errors.json
@@ -355,13 +355,6 @@
       ],
       "response": []
     },
-    "inference.update": {
-      "request": [
-        "/_inference/{inference_id}/_update: different http methods in the json spec",
-        "/_inference/{task_type}/{inference_id}/_update: different http methods in the json spec"
-      ],
-      "response": []
-    },
     "ingest.delete_geoip_database": {
       "request": [
         "Request: query parameter 'master_timeout' does not exist in the json spec",
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index e4372eb0ba..6cc6cccff2 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -13038,6 +13038,10 @@ export interface IndicesValidateQueryResponse {
   error?: string
 }
 
+export interface InferenceCompletionInferenceResult {
+  completion: InferenceCompletionResult[]
+}
+
 export interface InferenceCompletionResult {
   result: string
 }
@@ -13069,15 +13073,6 @@ export interface InferenceInferenceEndpointInfo extends InferenceInferenceEndpoi
   task_type: InferenceTaskType
 }
 
-export interface InferenceInferenceResult {
-  text_embedding_bytes?: InferenceTextEmbeddingByteResult[]
-  text_embedding_bits?: InferenceTextEmbeddingByteResult[]
-  text_embedding?: InferenceTextEmbeddingResult[]
-  sparse_embedding?: InferenceSparseEmbeddingResult[]
-  completion?: InferenceCompletionResult[]
-  rerank?: InferenceRankedDocument[]
-}
-
 export interface InferenceRankedDocument {
   index: integer
   relevance_score: float
@@ -13088,8 +13083,16 @@ export interface InferenceRateLimitSetting {
   requests_per_minute?: integer
 }
 
+export interface InferenceRerankedInferenceResult {
+  rerank: InferenceRankedDocument[]
+}
+
 export type InferenceServiceSettings = any
 
+export interface InferenceSparseEmbeddingInferenceResult {
+  sparse_embedding: InferenceSparseEmbeddingResult[]
+}
+
 export interface InferenceSparseEmbeddingResult {
   embedding: InferenceSparseVector
 }
@@ -13098,16 +13101,98 @@ export type InferenceSparseVector = Record<string, float>
 
 export type InferenceTaskSettings = any
 
-export type InferenceTaskType = 'sparse_embedding' | 'text_embedding' | 'rerank' | 'completion'
+export type InferenceTaskType = 'sparse_embedding' | 'text_embedding' | 'rerank' | 'completion' | 'chat_completion'
 
 export interface InferenceTextEmbeddingByteResult {
   embedding: InferenceDenseByteVector
 }
 
+export interface InferenceTextEmbeddingInferenceResult {
+  text_embedding_bytes?: InferenceTextEmbeddingByteResult[]
+  text_embedding_bits?: InferenceTextEmbeddingByteResult[]
+  text_embedding?: InferenceTextEmbeddingResult[]
+}
+
 export interface InferenceTextEmbeddingResult {
   embedding: InferenceDenseVector
 }
 
+export interface InferenceChatCompletionUnifiedCompletionTool {
+  type: string
+  function: InferenceChatCompletionUnifiedCompletionToolFunction
+}
+
+export interface InferenceChatCompletionUnifiedCompletionToolChoice {
+  type: string
+  function: InferenceChatCompletionUnifiedCompletionToolChoiceFunction
+}
+
+export interface InferenceChatCompletionUnifiedCompletionToolChoiceFunction {
+  name: string
+}
+
+export interface InferenceChatCompletionUnifiedCompletionToolFunction {
+  description?: string
+  name: string
+  parameters?: any
+  strict?: boolean
+}
+
+export type InferenceChatCompletionUnifiedCompletionToolType = string | InferenceChatCompletionUnifiedCompletionToolChoice
+
+export interface InferenceChatCompletionUnifiedContentObject {
+  text: string
+  type: string
+}
+
+export interface InferenceChatCompletionUnifiedMessage {
+  content?: InferenceChatCompletionUnifiedMessageContent
+  role: string
+  tool_call_id?: Id
+  tool_calls?: InferenceChatCompletionUnifiedToolCall[]
+}
+
+export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[]
+
+export interface InferenceChatCompletionUnifiedRequest extends RequestBase {
+  inference_id: Id
+  timeout?: Duration
+  body?: {
+    messages: InferenceChatCompletionUnifiedMessage[]
+    model?: string
+    max_completion_tokens?: long
+    stop?: string[]
+    temperature?: float
+    tool_choice?: InferenceChatCompletionUnifiedCompletionToolType
+    tools?: InferenceChatCompletionUnifiedCompletionTool[]
+    top_p?: float
+  }
+}
+
+export type InferenceChatCompletionUnifiedResponse = StreamResult
+
+export interface InferenceChatCompletionUnifiedToolCall {
+  id: Id
+  function: InferenceChatCompletionUnifiedToolCallFunction
+  type: string
+}
+
+export interface InferenceChatCompletionUnifiedToolCallFunction {
+  arguments: string
+  name: string
+}
+
+export interface InferenceCompletionRequest extends RequestBase {
+  inference_id: Id
+  timeout?: Duration
+  body?: {
+    input: string | string[]
+    task_settings?: InferenceTaskSettings
+  }
+}
+
+export type InferenceCompletionResponse = InferenceCompletionInferenceResult
+
 export interface InferenceDeleteRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
@@ -13126,19 +13211,6 @@ export interface InferenceGetResponse {
   endpoints: InferenceInferenceEndpointInfo[]
 }
 
-export interface InferenceInferenceRequest extends RequestBase {
-  task_type?: InferenceTaskType
-  inference_id: Id
-  timeout?: Duration
-  body?: {
-    query?: string
-    input: string | string[]
-    task_settings?: InferenceTaskSettings
-  }
-}
-
-export type InferenceInferenceResponse = InferenceInferenceResult
-
 export interface InferencePutRequest extends RequestBase {
   task_type?: InferenceTaskType
   inference_id: Id
@@ -13171,81 +13243,49 @@ export interface InferencePutWatsonxWatsonxServiceSettings {
 
 export type InferencePutWatsonxWatsonxTaskType = 'text_embedding'
 
-export interface InferenceStreamInferenceRequest extends RequestBase {
+export interface InferenceRerankRequest extends RequestBase {
   inference_id: Id
-  task_type?: InferenceTaskType
+  timeout?: Duration
   body?: {
+    query: string
     input: string | string[]
+    task_settings?: InferenceTaskSettings
   }
 }
 
-export type InferenceStreamInferenceResponse = StreamResult
-
-export interface InferenceUnifiedInferenceCompletionTool {
-  type: string
-  function: InferenceUnifiedInferenceCompletionToolFunction
-}
-
-export interface InferenceUnifiedInferenceCompletionToolChoice {
-  type: string
-  function: InferenceUnifiedInferenceCompletionToolChoiceFunction
-}
-
-export interface InferenceUnifiedInferenceCompletionToolChoiceFunction {
-  name: string
-}
+export type InferenceRerankResponse = InferenceRerankedInferenceResult
 
-export interface InferenceUnifiedInferenceCompletionToolFunction {
-  description?: string
-  name: string
-  parameters?: any
-  strict?: boolean
+export interface InferenceSparseEmbeddingRequest extends RequestBase {
+  inference_id: Id
+  timeout?: Duration
+  body?: {
+    input: string | string[]
+    task_settings?: InferenceTaskSettings
+  }
 }
 
-export type InferenceUnifiedInferenceCompletionToolType = string | InferenceUnifiedInferenceCompletionToolChoice
-
-export interface InferenceUnifiedInferenceContentObject {
-  text: string
-  type: string
-}
+export type InferenceSparseEmbeddingResponse = InferenceSparseEmbeddingInferenceResult
 
-export interface InferenceUnifiedInferenceMessage {
-  content?: InferenceUnifiedInferenceMessageContent
-  role: string
-  tool_call_id?: Id
-  tool_calls?: InferenceUnifiedInferenceToolCall[]
+export interface InferenceStreamCompletionRequest extends RequestBase {
+  inference_id: Id
+  body?: {
+    input: string | string[]
+    task_settings?: InferenceTaskSettings
+  }
 }
 
-export type InferenceUnifiedInferenceMessageContent = string | InferenceUnifiedInferenceContentObject[]
+export type InferenceStreamCompletionResponse = StreamResult
 
-export interface InferenceUnifiedInferenceRequest extends RequestBase {
-  task_type?: InferenceTaskType
+export interface InferenceTextEmbeddingRequest extends RequestBase {
   inference_id: Id
   timeout?: Duration
   body?: {
-    messages: InferenceUnifiedInferenceMessage[]
-    model?: string
-    max_completion_tokens?: long
-    stop?: string[]
-    temperature?: float
-    tool_choice?: InferenceUnifiedInferenceCompletionToolType
-    tools?: InferenceUnifiedInferenceCompletionTool[]
-    top_p?: float
+    input: string | string[]
+    task_settings?: InferenceTaskSettings
   }
 }
 
-export type InferenceUnifiedInferenceResponse = StreamResult
-
-export interface InferenceUnifiedInferenceToolCall {
-  id: Id
-  function: InferenceUnifiedInferenceToolCallFunction
-  type: string
-}
-
-export interface InferenceUnifiedInferenceToolCallFunction {
-  arguments: string
-  name: string
-}
+export type InferenceTextEmbeddingResponse = InferenceTextEmbeddingInferenceResult
 
 export interface InferenceUpdateRequest extends RequestBase {
   inference_id: Id
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
index f81f846892..621b3ab937 100644
--- a/specification/_doc_ids/table.csv
+++ b/specification/_doc_ids/table.csv
@@ -321,6 +321,7 @@ inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branc
 inference-api-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-watsonx-ai.html
 inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html
 inference-api-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html
+inference-api-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/chat-completion-inference-api.html
 inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html
 info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html
 ingest,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ingest.html
diff --git a/specification/_json_spec/inference.chat_completion_unified.json b/specification/_json_spec/inference.chat_completion_unified.json
new file mode 100644
index 0000000000..3fb9338dc5
--- /dev/null
+++ b/specification/_json_spec/inference.chat_completion_unified.json
@@ -0,0 +1,31 @@
+{
+  "inference.chat_completion_unified": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
+      "description": "Perform chat completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["text/event-stream"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/chat_completion/{inference_id}/_stream",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/_json_spec/inference.inference.json b/specification/_json_spec/inference.completion.json
similarity index 55%
rename from specification/_json_spec/inference.inference.json
rename to specification/_json_spec/inference.completion.json
index 6afa0a8ce6..f1543c4678 100644
--- a/specification/_json_spec/inference.inference.json
+++ b/specification/_json_spec/inference.completion.json
@@ -1,8 +1,8 @@
 {
-  "inference.inference": {
+  "inference.completion": {
     "documentation": {
       "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
-      "description": "Perform inference"
+      "description": "Perform completion inference"
     },
     "stability": "experimental",
     "visibility": "public",
@@ -13,7 +13,7 @@
     "url": {
       "paths": [
         {
-          "path": "/_inference/{inference_id}",
+          "path": "/_inference/completion/{inference_id}",
           "methods": ["POST"],
           "parts": {
             "inference_id": {
@@ -21,20 +21,6 @@
               "description": "The inference Id"
             }
           }
-        },
-        {
-          "path": "/_inference/{task_type}/{inference_id}",
-          "methods": ["POST"],
-          "parts": {
-            "task_type": {
-              "type": "string",
-              "description": "The task type"
-            },
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
         }
       ]
     },
diff --git a/specification/_json_spec/inference.rerank.json b/specification/_json_spec/inference.rerank.json
new file mode 100644
index 0000000000..ac9601852a
--- /dev/null
+++ b/specification/_json_spec/inference.rerank.json
@@ -0,0 +1,31 @@
+{
+  "inference.rerank": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform reranking inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/rerank/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/_json_spec/inference.sparse_embedding.json b/specification/_json_spec/inference.sparse_embedding.json
new file mode 100644
index 0000000000..48e9748cf0
--- /dev/null
+++ b/specification/_json_spec/inference.sparse_embedding.json
@@ -0,0 +1,31 @@
+{
+  "inference.sparse_embedding": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform sparse embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/sparse_embedding/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/_json_spec/inference.stream_inference.json b/specification/_json_spec/inference.stream_completion.json
similarity index 58%
rename from specification/_json_spec/inference.stream_inference.json
rename to specification/_json_spec/inference.stream_completion.json
index c8d0501e2e..2a579c7995 100644
--- a/specification/_json_spec/inference.stream_inference.json
+++ b/specification/_json_spec/inference.stream_completion.json
@@ -1,5 +1,5 @@
 {
-  "inference.stream_inference": {
+  "inference.stream_completion": {
     "documentation": {
       "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
       "description": "Perform streaming inference"
@@ -13,7 +13,7 @@
     "url": {
       "paths": [
         {
-          "path": "/_inference/{inference_id}/_stream",
+          "path": "/_inference/completion/{inference_id}/_stream",
           "methods": ["POST"],
           "parts": {
             "inference_id": {
@@ -21,20 +21,6 @@
               "description": "The inference Id"
             }
           }
-        },
-        {
-          "path": "/_inference/{task_type}/{inference_id}/_stream",
-          "methods": ["POST"],
-          "parts": {
-            "task_type": {
-              "type": "string",
-              "description": "The task type"
-            },
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
         }
       ]
     },
diff --git a/specification/_json_spec/inference.text_embedding.json b/specification/_json_spec/inference.text_embedding.json
new file mode 100644
index 0000000000..2f7c43c38a
--- /dev/null
+++ b/specification/_json_spec/inference.text_embedding.json
@@ -0,0 +1,31 @@
+{
+  "inference.text_embedding": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform text embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/text_embedding/{inference_id}",
+          "methods": ["POST"],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/specification/_json_spec/inference.unified_inference.json b/specification/_json_spec/inference.unified_inference.json
deleted file mode 100644
index 84182d19f8..0000000000
--- a/specification/_json_spec/inference.unified_inference.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "inference.unified_inference": {
-    "documentation": {
-      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
-      "description": "Perform inference using the Unified Schema"
-    },
-    "stability": "stable",
-    "visibility": "public",
-    "headers": {
-      "accept": ["text/event-stream"],
-      "content_type": ["application/json"]
-    },
-    "url": {
-      "paths": [
-        {
-          "path": "/_inference/{inference_id}/_unified",
-          "methods": ["POST"],
-          "parts": {
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
-        },
-        {
-          "path": "/_inference/{task_type}/{inference_id}/_unified",
-          "methods": ["POST"],
-          "parts": {
-            "task_type": {
-              "type": "string",
-              "description": "The task type"
-            },
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
-        }
-      ]
-    },
-    "body": {
-      "description": "The inference payload"
-    }
-  }
-}
diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts
index 6cf6ba25c6..72c6e08adc 100644
--- a/specification/inference/_types/Results.ts
+++ b/specification/inference/_types/Results.ts
@@ -37,6 +37,13 @@ export class SparseEmbeddingResult {
   embedding: SparseVector
 }
 
+/**
+ * The response format for the sparse embedding request.
+ */
+export class SparseEmbeddingInferenceResult {
+  sparse_embedding: Array<SparseEmbeddingResult>
+}
+
 /**
  * Text Embedding results containing bytes are represented as Dense
  * Vectors of bytes.
@@ -57,6 +64,16 @@ export class TextEmbeddingResult {
   embedding: DenseVector
 }
 
+/**
+ * TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants
+ * @variants container
+ */
+export class TextEmbeddingInferenceResult {
+  text_embedding_bytes?: Array<TextEmbeddingByteResult>
+  text_embedding_bits?: Array<TextEmbeddingByteResult>
+  text_embedding?: Array<TextEmbeddingResult>
+}
+
 /**
  * The completion result object
  */
@@ -64,6 +81,13 @@ export class CompletionResult {
   result: string
 }
 
+/**
+ * Defines the completion result.
+ */
+export class CompletionInferenceResult {
+  completion: Array<CompletionResult>
+}
+
 /**
  * The rerank result object representing a single ranked document
  * id: the original index of the document in the request
@@ -77,16 +101,10 @@ export class RankedDocument {
 }
 
 /**
- * InferenceResult is an aggregation of mutually exclusive variants
- * @variants container
+ * Defines the response for a rerank request.
  */
-export class InferenceResult {
-  text_embedding_bytes?: Array<TextEmbeddingByteResult>
-  text_embedding_bits?: Array<TextEmbeddingByteResult>
-  text_embedding?: Array<TextEmbeddingResult>
-  sparse_embedding?: Array<SparseEmbeddingResult>
-  completion?: Array<CompletionResult>
-  rerank?: Array<RankedDocument>
+export class RerankedInferenceResult {
+  rerank: Array<RankedDocument>
 }
 
 /**
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
index 8bdc9f3727..7299480cbe 100644
--- a/specification/inference/_types/TaskType.ts
+++ b/specification/inference/_types/TaskType.ts
@@ -24,5 +24,6 @@ export enum TaskType {
   sparse_embedding,
   text_embedding,
   rerank,
-  completion
+  completion,
+  chat_completion
 }
diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts
similarity index 92%
rename from specification/inference/unified_inference/UnifiedRequest.ts
rename to specification/inference/chat_completion_unified/UnifiedRequest.ts
index 268b543ebc..ac460afa95 100644
--- a/specification/inference/unified_inference/UnifiedRequest.ts
+++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts
@@ -17,7 +17,6 @@
  * under the License.
  */
 
-import { TaskType } from '@inference/_types/TaskType'
 import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
@@ -25,27 +24,20 @@ import { float, long } from '@_types/Numeric'
 import { Duration } from '@_types/Time'
 
 /**
- * Perform inference on the service using the Unified Schema
- * @rest_spec_name inference.unified_inference
+ * Perform chat completion inference
+ * @rest_spec_name inference.chat_completion_unified
  * @availability stack since=8.18.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
+ * @doc_id inference-api-chat-completion
  */
 export interface Request extends RequestBase {
   urls: [
     {
-      path: '/_inference/{inference_id}/_unified'
-      methods: ['POST']
-    },
-    {
-      path: '/_inference/{task_type}/{inference_id}/_unified'
+      path: '/_inference/chat_completion/{inference_id}/_stream'
       methods: ['POST']
     }
   ]
   path_parts: {
-    /**
-     * The task type
-     */
-    task_type?: TaskType
     /**
      * The inference Id
      */
diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/chat_completion_unified/UnifiedResponse.ts
similarity index 100%
rename from specification/inference/unified_inference/UnifiedResponse.ts
rename to specification/inference/chat_completion_unified/UnifiedResponse.ts
diff --git a/specification/inference/completion/CompletionRequest.ts b/specification/inference/completion/CompletionRequest.ts
new file mode 100644
index 0000000000..cbcf938a15
--- /dev/null
+++ b/specification/inference/completion/CompletionRequest.ts
@@ -0,0 +1,63 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TaskSettings } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+
+/**
+ * Perform completion inference on the service
+ * @rest_spec_name inference.completion
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @doc_id inference-api-post
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/completion/{inference_id}'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * Inference input.
+     * Either a string or an array of strings.
+     */
+    input: string | Array<string>
+    /**
+     * Optional task settings
+     */
+    task_settings?: TaskSettings
+  }
+}
diff --git a/specification/inference/completion/CompletionResponse.ts b/specification/inference/completion/CompletionResponse.ts
new file mode 100644
index 0000000000..f852232d0f
--- /dev/null
+++ b/specification/inference/completion/CompletionResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { CompletionInferenceResult } from '@inference/_types/Results'
+
+export class Response {
+  body: CompletionInferenceResult
+}
diff --git a/specification/inference/inference/InferenceRequest.ts b/specification/inference/rerank/RerankRequest.ts
similarity index 61%
rename from specification/inference/inference/InferenceRequest.ts
rename to specification/inference/rerank/RerankRequest.ts
index c646fd5356..666356025f 100644
--- a/specification/inference/inference/InferenceRequest.ts
+++ b/specification/inference/rerank/RerankRequest.ts
@@ -18,21 +18,13 @@
  */
 
 import { TaskSettings } from '@inference/_types/Services'
-import { TaskType } from '@inference/_types/TaskType'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 import { Duration } from '@_types/Time'
 
 /**
- * Perform inference on the service.
- *
- * This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.
- * It returns a response with the results of the tasks.
- * The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.
- *
- * > info
- * > The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
- * @rest_spec_name inference.inference
+ * Perform rereanking inference on the service
+ * @rest_spec_name inference.rerank
  * @availability stack since=8.11.0 stability=stable visibility=public
  * @availability serverless stability=stable visibility=public
  * @cluster_privileges monitor_inference
@@ -41,19 +33,11 @@ import { Duration } from '@_types/Time'
 export interface Request extends RequestBase {
   urls: [
     {
-      path: '/_inference/{inference_id}'
-      methods: ['POST']
-    },
-    {
-      path: '/_inference/{task_type}/{inference_id}'
+      path: '/_inference/rerank/{inference_id}'
       methods: ['POST']
     }
   ]
   path_parts: {
-    /**
-     * The type of inference task that the model performs.
-     */
-    task_type?: TaskType
     /**
      * The unique identifier for the inference endpoint.
      */
@@ -68,10 +52,9 @@ export interface Request extends RequestBase {
   }
   body: {
     /**
-     * The query input, which is required only for the `rerank` task.
-     * It is not required for other tasks.
+     * Query input.
      */
-    query?: string
+    query: string
     /**
      * The text on which you want to perform the inference task.
      * It can be a single string or an array.
diff --git a/specification/inference/inference/InferenceResponse.ts b/specification/inference/rerank/RerankResponse.ts
similarity index 89%
rename from specification/inference/inference/InferenceResponse.ts
rename to specification/inference/rerank/RerankResponse.ts
index 842d9a4f27..f0a4b48a67 100644
--- a/specification/inference/inference/InferenceResponse.ts
+++ b/specification/inference/rerank/RerankResponse.ts
@@ -17,8 +17,8 @@
  * under the License.
  */
 
-import { InferenceResult } from '@inference/_types/Results'
+import { RerankedInferenceResult } from '@inference/_types/Results'
 
 export class Response {
-  body: InferenceResult
+  body: RerankedInferenceResult
 }
diff --git a/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts
new file mode 100644
index 0000000000..90ce321816
--- /dev/null
+++ b/specification/inference/sparse_embedding/SparseEmbeddingRequest.ts
@@ -0,0 +1,63 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TaskSettings } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+
+/**
+ * Perform sparse embedding inference on the service
+ * @rest_spec_name inference.sparse_embedding
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @doc_id inference-api-post
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/sparse_embedding/{inference_id}'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * Inference input.
+     * Either a string or an array of strings.
+     */
+    input: string | Array<string>
+    /**
+     * Optional task settings
+     */
+    task_settings?: TaskSettings
+  }
+}
diff --git a/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts
new file mode 100644
index 0000000000..3a7e0de59a
--- /dev/null
+++ b/specification/inference/sparse_embedding/SparseEmbeddingResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { SparseEmbeddingInferenceResult } from '@inference/_types/Results'
+
+export class Response {
+  body: SparseEmbeddingInferenceResult
+}
diff --git a/specification/inference/stream_inference/StreamInferenceRequest.ts b/specification/inference/stream_completion/StreamInferenceRequest.ts
similarity index 88%
rename from specification/inference/stream_inference/StreamInferenceRequest.ts
rename to specification/inference/stream_completion/StreamInferenceRequest.ts
index 0bb675c5fb..ae83157ffb 100644
--- a/specification/inference/stream_inference/StreamInferenceRequest.ts
+++ b/specification/inference/stream_completion/StreamInferenceRequest.ts
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-import { TaskType } from '@inference/_types/TaskType'
+import { TaskSettings } from '@inference/_types/Services'
 import { RequestBase } from '@_types/Base'
 import { Id } from '@_types/common'
 
@@ -29,7 +29,7 @@ import { Id } from '@_types/common'
  * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.
  *
  * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.
- * @rest_spec_name inference.stream_inference
+ * @rest_spec_name inference.stream_completion
  * @availability stack since=8.16.0 stability=stable visibility=public
  * @cluster_privileges monitor_inference
  * @doc_id inference-api-stream
@@ -37,11 +37,7 @@ import { Id } from '@_types/common'
 export interface Request extends RequestBase {
   urls: [
     {
-      path: '/_inference/{inference_id}/_stream'
-      methods: ['POST']
-    },
-    {
-      path: '/_inference/{task_type}/{inference_id}/_stream'
+      path: '/_inference/completion/{inference_id}/_stream'
       methods: ['POST']
     }
   ]
@@ -50,10 +46,6 @@ export interface Request extends RequestBase {
      * The unique identifier for the inference endpoint.
      */
     inference_id: Id
-    /**
-     * The type of task that the model performs.
-     */
-    task_type?: TaskType
   }
   body: {
     /**
@@ -63,5 +55,9 @@ export interface Request extends RequestBase {
      * NOTE: Inference endpoints for the completion task type currently only support a single string as input.
      */
     input: string | string[]
+    /**
+     * Optional task settings
+     */
+    task_settings?: TaskSettings
   }
 }
diff --git a/specification/inference/stream_inference/StreamInferenceResponse.ts b/specification/inference/stream_completion/StreamInferenceResponse.ts
similarity index 100%
rename from specification/inference/stream_inference/StreamInferenceResponse.ts
rename to specification/inference/stream_completion/StreamInferenceResponse.ts
diff --git a/specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml b/specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml
similarity index 100%
rename from specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml
rename to specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml
diff --git a/specification/inference/text_embedding/TextEmbeddingRequest.ts b/specification/inference/text_embedding/TextEmbeddingRequest.ts
new file mode 100644
index 0000000000..f707cb997e
--- /dev/null
+++ b/specification/inference/text_embedding/TextEmbeddingRequest.ts
@@ -0,0 +1,63 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TaskSettings } from '@inference/_types/Services'
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+
+/**
+ * Perform text embedding inference on the service
+ * @rest_spec_name inference.text_embedding
+ * @availability stack since=8.11.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @doc_id inference-api-post
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/text_embedding/{inference_id}'
+      methods: ['POST']
+    }
+  ]
+  path_parts: {
+    /**
+     * The inference Id
+     */
+    inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference request to complete.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * Inference input.
+     * Either a string or an array of strings.
+     */
+    input: string | Array<string>
+    /**
+     * Optional task settings
+     */
+    task_settings?: TaskSettings
+  }
+}
diff --git a/specification/inference/text_embedding/TextEmbeddingResponse.ts b/specification/inference/text_embedding/TextEmbeddingResponse.ts
new file mode 100644
index 0000000000..c5cb85bd66
--- /dev/null
+++ b/specification/inference/text_embedding/TextEmbeddingResponse.ts
@@ -0,0 +1,24 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { TextEmbeddingInferenceResult } from '@inference/_types/Results'
+
+export class Response {
+  body: TextEmbeddingInferenceResult
+}
diff --git a/specification/inference/update/UpdateInferenceRequest.ts b/specification/inference/update/UpdateInferenceRequest.ts
index b1ea359fe6..ef216c9207 100644
--- a/specification/inference/update/UpdateInferenceRequest.ts
+++ b/specification/inference/update/UpdateInferenceRequest.ts
@@ -39,11 +39,11 @@ export interface Request extends RequestBase {
   urls: [
     {
       path: '/_inference/{inference_id}/_update'
-      methods: ['POST']
+      methods: ['PUT']
     },
     {
       path: '/_inference/{task_type}/{inference_id}/_update'
-      methods: ['POST']
+      methods: ['PUT']
     }
   ]
   path_parts: {