Skip to content

Commit af6a0b3

Browse files
authored
Adds custom inference service API docs (#4852)
* Adds custom inference service docs. * Adds response documentation. * Adds request params docs. * Fixes code style. * Fixes data type. * Adds json_spec. * Fixes typo. * Adds doc_id to the table.csv file. * Makes it prettier. * Adds examples. * Format fix. * Addresses feedback. * Adds more parameters and explanations. * Completes json_parser. * Addresses feedback. * Format fix. * Addresses more feedback.
1 parent 5690689 commit af6a0b3

14 files changed

+586
-12
lines changed

specification/_doc_ids/table.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ inference-api-put-anthropic,https://www.elastic.co/docs/api/doc/elasticsearch/op
358358
inference-api-put-azureaistudio,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-ai-studio.html,
359359
inference-api-put-azureopenai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-azure-openai.html,
360360
inference-api-put-cohere,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-cohere.html,
361+
inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,https://www.elastic.co/guide/en/elasticsearch/reference/8.19/infer-service-custom.html,
361362
inference-api-put-deepseek,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-deepseek.html,
362363
inference-api-put-eis,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis,,
363364
inference-api-put-elasticsearch,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"inference.put_custom": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom",
5+
"description": "Configure a custom inference endpoint"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/{task_type}/{custom_inference_id}",
17+
"methods": ["PUT"],
18+
"parts": {
19+
"task_type": {
20+
"type": "string",
21+
"description": "The task type"
22+
},
23+
"custom_inference_id": {
24+
"type": "string",
25+
"description": "The inference Id"
26+
}
27+
}
28+
}
29+
]
30+
},
31+
"body": {
32+
"description": "The inference endpoint's task and service settings"
33+
}
34+
}
35+
}

specification/inference/_types/CommonTypes.ts

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,256 @@ export class CohereTaskSettings {
756756
truncate?: CohereTruncateType
757757
}
758758

759+
export class CustomServiceSettings {
760+
/**
761+
* Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service.
762+
* For example:
763+
* ```
764+
* "headers":{
765+
* "Authorization": "Bearer ${api_key}",
766+
* "Content-Type": "application/json;charset=utf-8"
767+
* }
768+
* ```
769+
*/
770+
headers?: UserDefinedValue
771+
/**
772+
* Specifies the input type translation values that are used to replace the `${input_type}` template in the request body.
773+
* For example:
774+
* ```
775+
* "input_type": {
776+
* "translation": {
777+
* "ingest": "do_ingest",
778+
* "search": "do_search"
779+
* },
780+
* "default": "a_default"
781+
* },
782+
* ```
783+
* If the subsequent inference requests come from a search context, the `search` key will be used and the template will be replaced with `do_search`.
784+
* If it comes from the ingest context `do_ingest` is used. If it's a different context that is not specified, the default value will be used. If no default is specified an empty string is used.
785+
* `translation` can be:
786+
* * `classification`
787+
* * `clustering`
788+
* * `ingest`
789+
* * `search`
790+
*/
791+
input_type?: UserDefinedValue
792+
/**
793+
* Specifies the query parameters as a list of tuples. The arrays inside the `query_parameters` must have two items, a key and a value.
794+
* For example:
795+
* ```
796+
* "query_parameters":[
797+
* ["param_key", "some_value"],
798+
* ["param_key", "another_value"],
799+
* ["other_key", "other_value"]
800+
* ]
801+
* ```
802+
* If the base url is `https://www.elastic.co` it results in: `https://www.elastic.co?param_key=some_value&param_key=another_value&other_key=other_value`.
803+
*/
804+
query_parameters?: UserDefinedValue
805+
/**
806+
* The request configuration object.
807+
*/
808+
request: CustomRequestParams
809+
/**
810+
* The response configuration object.
811+
*/
812+
response: CustomResponseParams
813+
/**
814+
* Specifies secret parameters, like `api_key` or `api_token`, that are required to access the custom service.
815+
* For example:
816+
* ```
817+
* "secret_parameters":{
818+
* "api_key":"<api_key>"
819+
* }
820+
* ```
821+
*/
822+
secret_parameters: UserDefinedValue
823+
/**
824+
* The URL endpoint to use for the requests.
825+
*/
826+
url?: string
827+
}
828+
829+
export class CustomRequestParams {
830+
/**
831+
* The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body.
832+
* For example:
833+
* ```
834+
* "request": "{\"input\":${input}}"
835+
* ```
836+
* > info
837+
* > The content string needs to be a single line except when using the Kibana console.
838+
*/
839+
content: string
840+
}
841+
842+
export class CustomResponseParams {
843+
/**
844+
* Specifies the JSON parser that is used to parse the response from the custom service.
845+
* Different task types require different json_parser parameters.
846+
* For example:
847+
* ```
848+
* # text_embedding
849+
* # For a response like this:
850+
*
851+
* {
852+
* "object": "list",
853+
* "data": [
854+
* {
855+
* "object": "embedding",
856+
* "index": 0,
857+
* "embedding": [
858+
* 0.014539449,
859+
* -0.015288644
860+
* ]
861+
* }
862+
* ],
863+
* "model": "text-embedding-ada-002-v2",
864+
* "usage": {
865+
* "prompt_tokens": 8,
866+
* "total_tokens": 8
867+
* }
868+
* }
869+
*
870+
* # the json_parser definition should look like this:
871+
*
872+
* "response":{
873+
* "json_parser":{
874+
* "text_embeddings":"$.data[*].embedding[*]"
875+
* }
876+
* }
877+
*
878+
* # sparse_embedding
879+
* # For a response like this:
880+
*
881+
* {
882+
* "request_id": "75C50B5B-E79E-4930-****-F48DBB392231",
883+
* "latency": 22,
884+
* "usage": {
885+
* "token_count": 11
886+
* },
887+
* "result": {
888+
* "sparse_embeddings": [
889+
* {
890+
* "index": 0,
891+
* "embedding": [
892+
* {
893+
* "token_id": 6,
894+
* "weight": 0.101
895+
* },
896+
* {
897+
* "token_id": 163040,
898+
* "weight": 0.28417
899+
* }
900+
* ]
901+
* }
902+
* ]
903+
* }
904+
* }
905+
*
906+
* # the json_parser definition should look like this:
907+
*
908+
* "response":{
909+
* "json_parser":{
910+
* "token_path":"$.result.sparse_embeddings[*].embedding[*].token_id",
911+
* "weight_path":"$.result.sparse_embeddings[*].embedding[*].weight"
912+
* }
913+
* }
914+
*
915+
* # rerank
916+
* # For a response like this:
917+
*
918+
* {
919+
* "results": [
920+
* {
921+
* "index": 3,
922+
* "relevance_score": 0.999071,
923+
* "document": "abc"
924+
* },
925+
* {
926+
* "index": 4,
927+
* "relevance_score": 0.7867867,
928+
* "document": "123"
929+
* },
930+
* {
931+
* "index": 0,
932+
* "relevance_score": 0.32713068,
933+
* "document": "super"
934+
* }
935+
* ],
936+
* }
937+
*
938+
* # the json_parser definition should look like this:
939+
*
940+
* "response":{
941+
* "json_parser":{
942+
* "reranked_index":"$.result.scores[*].index", // optional
943+
* "relevance_score":"$.result.scores[*].score",
944+
* "document_text":"xxx" // optional
945+
* }
946+
* }
947+
*
948+
* # completion
949+
* # For a response like this:
950+
*
951+
* {
952+
* "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT",
953+
* "object": "chat.completion",
954+
* "created": 1741569952,
955+
* "model": "gpt-4.1-2025-04-14",
956+
* "choices": [
957+
* {
958+
* "index": 0,
959+
* "message": {
960+
* "role": "assistant",
961+
* "content": "Hello! How can I assist you today?",
962+
* "refusal": null,
963+
* "annotations": []
964+
* },
965+
* "logprobs": null,
966+
* "finish_reason": "stop"
967+
* }
968+
* ]
969+
* }
970+
*
971+
* # the json_parser definition should look like this:
972+
*
973+
* "response":{
974+
* "json_parser":{
975+
* "completion_result":"$.choices[*].message.content"
976+
* }
977+
* }
978+
*/
979+
json_parser: UserDefinedValue
980+
}
981+
982+
export enum CustomTaskType {
983+
text_embedding,
984+
sparse_embedding,
985+
rerank,
986+
completion
987+
}
988+
989+
export enum CustomServiceType {
990+
custom
991+
}
992+
993+
export class CustomTaskSettings {
994+
/**
995+
* Specifies parameters that are required to run the custom service. The parameters depend on the model your custom service uses.
996+
* For example:
997+
* ```
998+
* "task_settings":{
999+
* "parameters":{
1000+
* "input_type":"query",
1001+
* "return_token":true
1002+
* }
1003+
* }
1004+
* ```
1005+
*/
1006+
parameters?: UserDefinedValue
1007+
}
1008+
7591009
export class EisServiceSettings {
7601010
/**
7611011
* The name of the model to use for the inference task.

specification/inference/_types/Services.ts

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
TaskTypeAzureAIStudio,
2828
TaskTypeAzureOpenAI,
2929
TaskTypeCohere,
30+
TaskTypeCustom,
3031
TaskTypeDeepSeek,
3132
TaskTypeElasticsearch,
3233
TaskTypeELSER,
@@ -75,18 +76,6 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
7576
*/
7677
task_type: TaskType
7778
}
78-
79-
export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
80-
/**
81-
* The inference Id
82-
*/
83-
inference_id: string
84-
/**
85-
* The task type
86-
*/
87-
task_type: TaskTypeJinaAi
88-
}
89-
9079
export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint {
9180
/**
9281
* The inference Id
@@ -153,6 +142,16 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint {
153142
task_type: TaskTypeCohere
154143
}
155144

145+
export class InferenceEndpointInfoCustom extends InferenceEndpoint {
146+
/**
147+
* The inference Id
148+
*/
149+
inference_id: string
150+
/**
151+
* The task type
152+
*/
153+
task_type: TaskTypeCustom
154+
}
156155
export class InferenceEndpointInfoDeepSeek extends InferenceEndpoint {
157156
/**
158157
* The inference Id
@@ -219,6 +218,17 @@ export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint {
219218
task_type: TaskTypeHuggingFace
220219
}
221220

221+
export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
222+
/**
223+
* The inference Id
224+
*/
225+
inference_id: string
226+
/**
227+
* The task type
228+
*/
229+
task_type: TaskTypeJinaAi
230+
}
231+
222232
export class InferenceEndpointInfoMistral extends InferenceEndpoint {
223233
/**
224234
* The inference Id

specification/inference/_types/TaskType.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ export enum TaskTypeCohere {
6565
completion
6666
}
6767

68+
export enum TaskTypeCustom {
69+
text_embedding,
70+
sparse_embedding,
71+
rerank,
72+
completion
73+
}
74+
6875
export enum TaskTypeDeepSeek {
6976
completion,
7077
chat_completion

0 commit comments

Comments
 (0)