Skip to content

Commit c16b9d0

Browse files
Update llama specification
1 parent 4a49a4a commit c16b9d0

File tree

9 files changed

+789
-96
lines changed

9 files changed

+789
-96
lines changed

output/openapi/elasticsearch-openapi.json

Lines changed: 187 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/openapi/elasticsearch-serverless-openapi.json

Lines changed: 187 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema.json

Lines changed: 413 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 0 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/inference/_types/CommonTypes.ts

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,7 +1560,7 @@ export class LlamaServiceSettings {
15601560
/**
15611561
* The URL endpoint of the Llama stack endpoint.
15621562
* URL must contain:
1563-
* * For `text_embedding` task - `/v1/openai/v1/embeddings`.
1563+
* * For `text_embedding` task - `/v1/inference/embeddings`.
15641564
* * For `completion` and `chat_completion` tasks - `/v1/openai/v1/chat/completions`.
15651565
*/
15661566
url: string
@@ -1573,26 +1573,10 @@ export class LlamaServiceSettings {
15731573
* @ext_doc_id llama-api-models
15741574
*/
15751575
model_id: string
1576-
/**
1577-
* A valid API key for accessing Llama stack endpoint that is going to be sent as part of Bearer authentication header.
1578-
* This field is optional because Llama stack doesn't provide authentication by default.
1579-
*
1580-
* IMPORTANT: You need to provide the API key only once, during the inference model creation.
1581-
* The get inference endpoint API does not retrieve your API key.
1582-
* After creating the inference model, you cannot change the associated API key.
1583-
* If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
1584-
*/
1585-
api_key?: string
15861576
/**
15871577
* For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.
15881578
*/
15891579
max_input_tokens?: integer
1590-
/**
1591-
* For a `text_embedding` task, the number of dimensions the resulting output embeddings must have.
1592-
* It is supported only in `text-embedding-3` and later models. If it is not set by user, it defaults to the model returned dimensions.
1593-
* If model returns embeddings with a different number of dimensions, error is returned.
1594-
*/
1595-
dimensions?: integer
15961580
/**
15971581
* For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
15981582
*/
@@ -1604,14 +1588,6 @@ export class LlamaServiceSettings {
16041588
rate_limit?: RateLimitSetting
16051589
}
16061590

1607-
export class LlamaTaskSettings {
1608-
/**
1609-
* For a `completion` or `text_embedding` task, specify the user issuing the request.
1610-
* This information can be used for abuse detection.
1611-
*/
1612-
user?: string
1613-
}
1614-
16151591
export enum LlamaTaskType {
16161592
text_embedding,
16171593
completion,

specification/inference/put_llama/PutLlamaRequest.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import { Duration } from '@_types/Time'
2323
import {
2424
LlamaServiceSettings,
2525
LlamaServiceType,
26-
LlamaTaskSettings,
2726
LlamaTaskType
2827
} from '@inference/_types/CommonTypes'
2928
import { InferenceChunkingSettings } from '@inference/_types/Services'
@@ -76,10 +75,5 @@ export interface Request extends RequestBase {
7675
* Settings used to install the inference model. These settings are specific to the `llama` service.
7776
*/
7877
service_settings: LlamaServiceSettings
79-
/**
80-
* Settings to configure the inference task.
81-
* These settings are specific to the task type you specified.
82-
*/
83-
task_settings?: LlamaTaskSettings
8478
}
8579
}

specification/inference/put_llama/examples/request/PutLlamaRequestExample1.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ value: |-
66
{
77
"service": "llama",
88
"service_settings": {
9-
"url": "http://localhost:8321/v1/openai/v1/embeddings"
9+
"url": "http://localhost:8321/v1/inference/embeddings"
1010
"dimensions": 384,
11-
"api_key": "llama-api-key",
1211
"model_id": "all-MiniLM-L6-v2"
1312
}
1413
}

specification/inference/put_llama/examples/request/PutLlamaRequestExample2.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ value: |-
77
"service": "llama",
88
"service_settings": {
99
"url": "http://localhost:8321/v1/openai/v1/chat/completions"
10-
"api_key": "llama-api-key",
1110
"model_id": "llama3.2:3b"
1211
}
1312
}

specification/inference/put_llama/examples/request/PutLlamaRequestExample3.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ value: |-
77
"service": "llama",
88
"service_settings": {
99
"url": "http://localhost:8321/v1/openai/v1/chat/completions"
10-
"api_key": "llama-api-key",
1110
"model_id": "llama3.2:3b"
1211
}
1312
}

0 commit comments

Comments
 (0)