LLM-proxy-azure/main.py at main · Kenan7/LLM-proxy-azure · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

from flask import Flask, request, Response, jsonify
import requests
import os
import time
from functools import lru_cache
from flask_cors import CORS

app = Flask(__name__)
CORS(app, resources={
    r"/*": {
        "origins": "*",
        "methods": ["GET", "POST", "OPTIONS"],
        "allow_headers": ["Content-Type", "Authorization", "Accept", "Origin", "Api-Key"],
    },
})

# Configuration
# Configure with your specific Azure endpoint and model
AZURE_MODEL_NAME = "DeepSeek-R1"  # Must match exactly what Azure expects
AZURE_BASE_URL = os.getenv("AZURE_ENDPOINT", "https://<SERVER_NAME>.services.ai.azure.com")
AZURE_API_KEY = os.getenv("AZURE_API_KEY", "<API_KEY>")  # Set this in your environment

# Add favicon handler to prevent 404 errors
@app.route('/openai/deployments/<path:model>/chat/completions', methods=['POST'])
@app.route('/v1/chat/completions/deployments/<path:model>/chat/completions', methods=['POST'])
def chat_completion(model):
    """Handle chat completion requests (POST only)"""
    azure_url = f"{AZURE_BASE_URL}/models/chat/completions"

    # Prepare headers with Azure authentication
    headers = {
        "Content-Type": "application/json",
        "api-key": AZURE_API_KEY,
        "extra-parameters": "pass-through"
    }

    # Remove potentially problematic headers
    original_headers = dict(request.headers)
    original_headers.pop("Host", None)
    original_headers.pop("Accept-Encoding", None)

    try:
        original_body = request.get_json()
    except Exception as e:
        return jsonify({"error": "Invalid JSON body"}), 400

    modified_body = {
        **original_body,
        "model": AZURE_MODEL_NAME,
    }

    try:
        azure_response = requests.post(
            azure_url,
            headers=headers,
            params={"api-version": "2024-05-01-preview"},
            json=modified_body,
            stream=True
        )
        azure_response.raise_for_status()
    except requests.exceptions.RequestException as e:
        return jsonify({"error": str(e)}), 500

    def generate():
        for chunk in azure_response.iter_content(chunk_size=1024):
            yield chunk

    response_headers = dict(azure_response.headers)
    # Remove problematic encoding headers
    response_headers.pop("Content-Encoding", None)
    response_headers.pop("Transfer-Encoding", None)

    return Response(
        generate(),
        status=azure_response.status_code,
        headers=response_headers,
        content_type=azure_response.headers.get('Content-Type', 'application/json')
    )

@app.route('/favicon.ico')
def favicon():
    return Response(status=204)

#@lru_cache(maxsize=1)
def get_azure_models():
    """Fetch actual models from Azure with caching"""
    try:
        response = requests.get(
            f"{AZURE_BASE_URL}/openai/models?api-version=2024-10-21",
            headers={"api-key": AZURE_API_KEY},
            # params={"api-version": "2024-10-21"},
            timeout=5
        )
        print(response)
        response.raise_for_status()
        return response.json().get('data', [])
    except requests.exceptions.RequestException as e:
        app.logger.error(f"Azure model fetch failed: {str(e)}")
        return []

def map_azure_model(azure_model):
    """Map Azure model schema to OpenAI-compatible format"""
    capabilities = azure_model.get('capabilities', {})
    deprecation = azure_model.get('deprecation', {})

    return {
        "id": azure_model.get('id', 'unknown'),
        "object": "model",
        "created": azure_model.get('created_at', int(time.time())),
        "owned_by": "azure-ai",
        "permission": [{
            "id": f"modelperm-{azure_model['id'].lower()}",
            "object": "model_permission",
            "created": azure_model.get('created_at', int(time.time())),
            "allow_create_engine": False,
            "allow_sampling": capabilities.get('inference', False),
            "allow_logprobs": True,
            "allow_search_indices": False,
            "allow_view": True,
            "allow_fine_tuning": capabilities.get('fine_tune', False),
            "organization": "*",
            "group": None,
            "is_blocking": False
        }],
        "root": azure_model.get('model', azure_model.get('id')),
        "parent": azure_model.get('model', None)
    }

@app.route('/openai/deployments/<path:model>/models', methods=['GET'])
@app.route('/v1/models', methods=['GET'])
@app.route('/v1/chat/completions/models', methods=['GET'])
def list_models():
    """Dynamic model listing from Azure"""
    try:
        azure_models = get_azure_models()
        openai_models = [map_azure_model(m) for m in azure_models]

        return jsonify({
            "object": "list",
            "data": openai_models
        })

    except Exception as e:
        app.logger.error(f"Model list error: {str(e)}")
        return jsonify({
            "error": {
                "message": "Failed to fetch models",
                "type": "server_error",
                "code": 500
            }
        }), 500

if __name__ == '__main__':
    app.run(host='localhost', port=8085, debug=True, ssl_context=('localhost.pem', 'localhost-key.pem'))