-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
155 lines (135 loc) · 5.13 KB
/
main.py
File metadata and controls
155 lines (135 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from flask import Flask, request, Response, jsonify
import requests
import os
import time
from functools import lru_cache
from flask_cors import CORS
app = Flask(__name__)
CORS(app, resources={
r"/*": {
"origins": "*",
"methods": ["GET", "POST", "OPTIONS"],
"allow_headers": ["Content-Type", "Authorization", "Accept", "Origin", "Api-Key"],
},
})
# Configuration
# Configure with your specific Azure endpoint and model
AZURE_MODEL_NAME = "DeepSeek-R1" # Must match exactly what Azure expects
AZURE_BASE_URL = os.getenv("AZURE_ENDPOINT", "https://<SERVER_NAME>.services.ai.azure.com")
AZURE_API_KEY = os.getenv("AZURE_API_KEY", "<API_KEY>") # Set this in your environment
# Add favicon handler to prevent 404 errors
@app.route('/openai/deployments/<path:model>/chat/completions', methods=['POST'])
@app.route('/v1/chat/completions/deployments/<path:model>/chat/completions', methods=['POST'])
def chat_completion(model):
"""Handle chat completion requests (POST only)"""
azure_url = f"{AZURE_BASE_URL}/models/chat/completions"
# Prepare headers with Azure authentication
headers = {
"Content-Type": "application/json",
"api-key": AZURE_API_KEY,
"extra-parameters": "pass-through"
}
# Remove potentially problematic headers
original_headers = dict(request.headers)
original_headers.pop("Host", None)
original_headers.pop("Accept-Encoding", None)
try:
original_body = request.get_json()
except Exception as e:
return jsonify({"error": "Invalid JSON body"}), 400
modified_body = {
**original_body,
"model": AZURE_MODEL_NAME,
}
try:
azure_response = requests.post(
azure_url,
headers=headers,
params={"api-version": "2024-05-01-preview"},
json=modified_body,
stream=True
)
azure_response.raise_for_status()
except requests.exceptions.RequestException as e:
return jsonify({"error": str(e)}), 500
def generate():
for chunk in azure_response.iter_content(chunk_size=1024):
yield chunk
response_headers = dict(azure_response.headers)
# Remove problematic encoding headers
response_headers.pop("Content-Encoding", None)
response_headers.pop("Transfer-Encoding", None)
return Response(
generate(),
status=azure_response.status_code,
headers=response_headers,
content_type=azure_response.headers.get('Content-Type', 'application/json')
)
@app.route('/favicon.ico')
def favicon():
return Response(status=204)
#@lru_cache(maxsize=1)
def get_azure_models():
"""Fetch actual models from Azure with caching"""
try:
response = requests.get(
f"{AZURE_BASE_URL}/openai/models?api-version=2024-10-21",
headers={"api-key": AZURE_API_KEY},
# params={"api-version": "2024-10-21"},
timeout=5
)
print(response)
response.raise_for_status()
return response.json().get('data', [])
except requests.exceptions.RequestException as e:
app.logger.error(f"Azure model fetch failed: {str(e)}")
return []
def map_azure_model(azure_model):
"""Map Azure model schema to OpenAI-compatible format"""
capabilities = azure_model.get('capabilities', {})
deprecation = azure_model.get('deprecation', {})
return {
"id": azure_model.get('id', 'unknown'),
"object": "model",
"created": azure_model.get('created_at', int(time.time())),
"owned_by": "azure-ai",
"permission": [{
"id": f"modelperm-{azure_model['id'].lower()}",
"object": "model_permission",
"created": azure_model.get('created_at', int(time.time())),
"allow_create_engine": False,
"allow_sampling": capabilities.get('inference', False),
"allow_logprobs": True,
"allow_search_indices": False,
"allow_view": True,
"allow_fine_tuning": capabilities.get('fine_tune', False),
"organization": "*",
"group": None,
"is_blocking": False
}],
"root": azure_model.get('model', azure_model.get('id')),
"parent": azure_model.get('model', None)
}
@app.route('/openai/deployments/<path:model>/models', methods=['GET'])
@app.route('/v1/models', methods=['GET'])
@app.route('/v1/chat/completions/models', methods=['GET'])
def list_models():
"""Dynamic model listing from Azure"""
try:
azure_models = get_azure_models()
openai_models = [map_azure_model(m) for m in azure_models]
return jsonify({
"object": "list",
"data": openai_models
})
except Exception as e:
app.logger.error(f"Model list error: {str(e)}")
return jsonify({
"error": {
"message": "Failed to fetch models",
"type": "server_error",
"code": 500
}
}), 500
if __name__ == '__main__':
app.run(host='localhost', port=8085, debug=True, ssl_context=('localhost.pem', 'localhost-key.pem'))