Skip to content

Commit b59776d

Browse files
authored
Add chutes provider (#42)
* feat: add Chutes AI provider with Qwen, GLM and Kimi models * feat: update model pricing and add new DeepSeek V3.1 and Qwen3 models
1 parent 07a9f24 commit b59776d

File tree

3 files changed

+252
-0
lines changed

3 files changed

+252
-0
lines changed
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
{
2+
"name": "Chutes",
3+
"id": "chutes",
4+
"type": "openai",
5+
"api_key": "$CHUTES_API_KEY",
6+
"api_endpoint": "https://llm.chutes.ai/v1",
7+
"default_large_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
8+
"default_small_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
9+
"models": [
10+
{
11+
"id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
12+
"name": "Qwen3 Coder 480B A35B Instruct (FP8)",
13+
"cost_per_1m_in": 0.2,
14+
"cost_per_1m_out": 0.8,
15+
"context_window": 262000,
16+
"default_max_tokens": 32768,
17+
"can_reason": true,
18+
"has_reasoning_efforts": true,
19+
"default_reasoning_efforts": "medium",
20+
"supports_attachments": true
21+
},
22+
{
23+
"id": "zai-org/GLM-4.5-FP8",
24+
"name": "GLM 4.5 FP8",
25+
"cost_per_1m_in": 0.0,
26+
"cost_per_1m_out": 0.0,
27+
"context_window": 98000,
28+
"default_max_tokens": 32768,
29+
"can_reason": true,
30+
"has_reasoning_efforts": true,
31+
"default_reasoning_efforts": "medium",
32+
"supports_attachments": true
33+
},
34+
{
35+
"id": "moonshotai/Kimi-K2-Instruct-75k",
36+
"name": "Kimi K2 Instruct",
37+
"cost_per_1m_in": 0.15,
38+
"cost_per_1m_out": 0.59,
39+
"context_window": 75000,
40+
"default_max_tokens": 32768,
41+
"can_reason": true,
42+
"has_reasoning_efforts": true,
43+
"default_reasoning_efforts": "medium",
44+
"supports_attachments": true
45+
},
46+
{
47+
"id": "deepseek-ai/DeepSeek-R1-0528",
48+
"name": "DeepSeek R1 0528",
49+
"cost_per_1m_in": 0.18,
50+
"cost_per_1m_out": 0.72,
51+
"context_window": 75000,
52+
"default_max_tokens": 32768,
53+
"can_reason": true,
54+
"has_reasoning_efforts": true,
55+
"default_reasoning_efforts": "medium",
56+
"supports_attachments": true
57+
},
58+
{
59+
"id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
60+
"name": "DeepSeek R1 0528 Qwen3 8B",
61+
"cost_per_1m_in": 0.02,
62+
"cost_per_1m_out": 0.07,
63+
"context_window": 32768,
64+
"default_max_tokens": 8192,
65+
"can_reason": false,
66+
"has_reasoning_efforts": false,
67+
"supports_attachments": true
68+
},
69+
{
70+
"id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
71+
"name": "DeepSeek R1 Distill Llama 70B",
72+
"cost_per_1m_in": 0.03,
73+
"cost_per_1m_out": 0.14,
74+
"context_window": 65536,
75+
"default_max_tokens": 8192,
76+
"can_reason": false,
77+
"has_reasoning_efforts": false,
78+
"supports_attachments": true
79+
},
80+
{
81+
"id": "tngtech/DeepSeek-R1T-Chimera",
82+
"name": "DeepSeek R1T Chimera",
83+
"cost_per_1m_in": 0.18,
84+
"cost_per_1m_out": 0.72,
85+
"context_window": 131072,
86+
"default_max_tokens": 32768,
87+
"can_reason": true,
88+
"has_reasoning_efforts": true,
89+
"default_reasoning_efforts": "medium",
90+
"supports_attachments": true
91+
},
92+
{
93+
"id": "tngtech/DeepSeek-TNG-R1T2-Chimera",
94+
"name": "DeepSeek TNG R1T2 Chimera",
95+
"cost_per_1m_in": 0.20,
96+
"cost_per_1m_out": 0.80,
97+
"context_window": 262144,
98+
"default_max_tokens": 65536,
99+
"can_reason": true,
100+
"has_reasoning_efforts": true,
101+
"default_reasoning_efforts": "high",
102+
"supports_attachments": true
103+
},
104+
{
105+
"id": "deepseek-ai/DeepSeek-V3-0324",
106+
"name": "DeepSeek V3 0324",
107+
"cost_per_1m_in": 0.18,
108+
"cost_per_1m_out": 0.72,
109+
"context_window": 75000,
110+
"default_max_tokens": 32768,
111+
"can_reason": true,
112+
"has_reasoning_efforts": true,
113+
"default_reasoning_efforts": "medium",
114+
"supports_attachments": true
115+
},
116+
{
117+
"id": "chutesai/Devstral-Small-2505",
118+
"name": "Devstral Small 2505",
119+
"cost_per_1m_in": 0.02,
120+
"cost_per_1m_out": 0.08,
121+
"context_window": 32768,
122+
"default_max_tokens": 8192,
123+
"can_reason": false,
124+
"has_reasoning_efforts": false,
125+
"supports_attachments": true
126+
},
127+
{
128+
"id": "zai-org/GLM-4.5-Air",
129+
"name": "GLM 4.5 Air",
130+
"cost_per_1m_in": 0.0,
131+
"cost_per_1m_out": 0.0,
132+
"context_window": 131072,
133+
"default_max_tokens": 32768,
134+
"can_reason": true,
135+
"has_reasoning_efforts": true,
136+
"default_reasoning_efforts": "medium",
137+
"supports_attachments": true
138+
},
139+
{
140+
"id": "openai/gpt-oss-120b",
141+
"name": "GPT OSS 120B",
142+
"cost_per_1m_in": 0.10,
143+
"cost_per_1m_out": 0.41,
144+
"context_window": 131072,
145+
"default_max_tokens": 32768,
146+
"can_reason": true,
147+
"has_reasoning_efforts": true,
148+
"default_reasoning_efforts": "medium",
149+
"supports_attachments": true
150+
},
151+
{
152+
"id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
153+
"name": "Mistral Small 3.2 24B Instruct 2506",
154+
"cost_per_1m_in": 0.02,
155+
"cost_per_1m_out": 0.08,
156+
"context_window": 32768,
157+
"default_max_tokens": 8192,
158+
"can_reason": false,
159+
"has_reasoning_efforts": false,
160+
"supports_attachments": true
161+
},
162+
{
163+
"id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
164+
"name": "Qwen3 235B A22B Instruct 2507",
165+
"cost_per_1m_in": 0.08,
166+
"cost_per_1m_out": 0.31,
167+
"context_window": 32768,
168+
"default_max_tokens": 8192,
169+
"can_reason": false,
170+
"has_reasoning_efforts": false,
171+
"supports_attachments": true
172+
},
173+
{
174+
"id": "Qwen/Qwen3-30B-A3B",
175+
"name": "Qwen3 30B A3B",
176+
"cost_per_1m_in": 0.02,
177+
"cost_per_1m_out": 0.08,
178+
"context_window": 32768,
179+
"default_max_tokens": 8192,
180+
"can_reason": false,
181+
"has_reasoning_efforts": false,
182+
"supports_attachments": true
183+
},
184+
{
185+
"id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
186+
"name": "Qwen3 235B A22B Thinking 2507",
187+
"cost_per_1m_in": 0.08,
188+
"cost_per_1m_out": 0.31,
189+
"context_window": 32768,
190+
"default_max_tokens": 8192,
191+
"can_reason": true,
192+
"has_reasoning_efforts": true,
193+
"default_reasoning_efforts": "high",
194+
"supports_attachments": true
195+
},
196+
{
197+
"id": "deepseek-ai/DeepSeek-V3.1",
198+
"name": "DeepSeek V3.1",
199+
"cost_per_1m_in": 0.20,
200+
"cost_per_1m_out": 0.80,
201+
"context_window": 163840,
202+
"default_max_tokens": 32768,
203+
"can_reason": false,
204+
"has_reasoning_efforts": false,
205+
"supports_attachments": true
206+
},
207+
{
208+
"id": "deepseek-ai/DeepSeek-V3.1:THINKING",
209+
"name": "DeepSeek V3.1 Reasoning",
210+
"cost_per_1m_in": 0.20,
211+
"cost_per_1m_out": 0.80,
212+
"context_window": 163840,
213+
"default_max_tokens": 32768,
214+
"can_reason": true,
215+
"has_reasoning_efforts": true,
216+
"default_reasoning_efforts": "medium",
217+
"supports_attachments": true
218+
},
219+
{
220+
"id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
221+
"name": "Qwen3 30B A3B Instruct 2507",
222+
"cost_per_1m_in": 0.05,
223+
"cost_per_1m_out": 0.20,
224+
"context_window": 262144,
225+
"default_max_tokens": 32768,
226+
"can_reason": false,
227+
"has_reasoning_efforts": false,
228+
"supports_attachments": true
229+
},
230+
{
231+
"id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
232+
"name": "Qwen3 Coder 30B A3B Instruct",
233+
"cost_per_1m_in": 0.00,
234+
"cost_per_1m_out": 0.00,
235+
"context_window": 262144,
236+
"default_max_tokens": 32768,
237+
"can_reason": false,
238+
"has_reasoning_efforts": false,
239+
"supports_attachments": true
240+
}
241+
]
242+
}

internal/providers/providers.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ var cerebrasConfig []byte
4848
//go:embed configs/venice.json
4949
var veniceConfig []byte
5050

51+
//go:embed configs/chutes.json
52+
var chutesConfig []byte
53+
5154
//go:embed configs/deepseek.json
5255
var deepSeekConfig []byte
5356

@@ -68,6 +71,7 @@ var providerRegistry = []ProviderFunc{
6871
lambdaProvider,
6972
cerebrasProvider,
7073
veniceProvider,
74+
chutesProvider,
7175
deepSeekProvider,
7276
}
7377

@@ -141,6 +145,10 @@ func veniceProvider() catwalk.Provider {
141145
return loadProviderFromConfig(veniceConfig)
142146
}
143147

148+
func chutesProvider() catwalk.Provider {
149+
return loadProviderFromConfig(chutesConfig)
150+
}
151+
144152
func deepSeekProvider() catwalk.Provider {
145153
return loadProviderFromConfig(deepSeekConfig)
146154
}

pkg/catwalk/provider.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const (
3131
InferenceProviderLambda InferenceProvider = "lambda"
3232
InferenceProviderCerebras InferenceProvider = "cerebras"
3333
InferenceProviderVenice InferenceProvider = "venice"
34+
InferenceProviderChutes InferenceProvider = "chutes"
3435
)
3536

3637
// Provider represents an AI provider configuration.
@@ -78,5 +79,6 @@ func KnownProviders() []InferenceProvider {
7879
InferenceProviderLambda,
7980
InferenceProviderCerebras,
8081
InferenceProviderVenice,
82+
InferenceProviderChutes,
8183
}
8284
}

0 commit comments

Comments
 (0)