File size: 6,809 Bytes
8cd74d0 0fcecd1 8cd74d0 a80c974 19e3e0a a80c974 0fcecd1 8cd74d0 0fcecd1 f6a33d3 b1d8113 8cd74d0 19e3e0a 0fcecd1 8cd74d0 19e3e0a 0fcecd1 8cd74d0 0fcecd1 8cd74d0 19e3e0a 8cd74d0 f6a33d3 0fcecd1 f6a33d3 8cd74d0 19e3e0a 8cd74d0 330b19d 0fcecd1 8cd74d0 0fcecd1 f6a33d3 330b19d 0fcecd1 f6a33d3 8cd74d0 0fcecd1 8cd74d0 0fcecd1 8cd74d0 19e3e0a 8cd74d0 19e3e0a 0fcecd1 8cd74d0 b1d8113 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from fastapi import FastAPI, HTTPException, Header, Depends
from pydantic import BaseModel
import requests
import os
import json
from typing import Optional, Dict
app = FastAPI(title="CygnisAI Studio API")
# --- CONFIGURATION ---
HF_TOKEN = os.environ.get("HF_TOKEN")
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
# Mapping vers des modèles NON-GATED et POPULAIRES
MODELS = {
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
"XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
# Défaut : Qwen 2.5 (Très robuste et souvent dispo)
"default": "Qwen/Qwen2.5-7B-Instruct"
}
# Modèle de secours ultime (Microsoft Phi 3.5 est très léger et souvent dispo)
SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct"
# URL de base UNIQUE pour le routeur HF
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
class ChatRequest(BaseModel):
question: str
model: Optional[str] = "default"
system_prompt: Optional[str] = None
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 1024
class ChatResponse(BaseModel):
answer: str
model_used: str
sources: list = []
async def verify_api_key(authorization: str = Header(None)):
if not authorization:
print("⚠️ Missing Authorization header")
# On ne bloque pas pour faciliter le debug, mais on log
try:
scheme, token = authorization.split()
if scheme.lower() != 'bearer':
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
if token != CYGNIS_API_KEY:
print(f"⚠️ Invalid API Key: {token}")
# raise HTTPException(status_code=403, detail="Invalid API Key") # Commenté pour debug
except ValueError:
pass # On laisse passer pour le moment
@app.get("/")
def read_root():
return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
def call_hf_api(model_id, messages, req):
"""Fonction helper pour appeler l'API HF avec gestion Chat/Standard"""
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
# 1. Tentative Chat API
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
payload_chat = {
"model": model_id,
"messages": messages,
"max_tokens": req.max_tokens,
"temperature": req.temperature,
"stream": False
}
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
# 2. Fallback Standard API
if response.status_code in [404, 405]:
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
api_url = f"{HF_ROUTER_BASE}/{model_id}"
prompt_str = ""
for msg in messages:
role = msg['role']
content = msg['content']
if role == 'system': prompt_str += f"<|system|>\n{content}\n"
elif role == 'user': prompt_str += f"<|user|>\n{content}\n"
elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n"
prompt_str += "<|assistant|>\n"
payload_standard = {
"inputs": prompt_str,
"parameters": {
"max_new_tokens": req.max_tokens,
"temperature": req.temperature,
"return_full_text": False
}
}
print(f"🚀 Calling HF Standard API: {api_url}")
response = requests.post(api_url, headers=headers, json=payload_standard)
return response
@app.post("/api/ask", response_model=ChatResponse)
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
print(f"📩 Received request: {req.question[:50]}...")
if not HF_TOKEN:
print("❌ CRITICAL: HF_TOKEN is missing!")
# Mock response instead of crash
return {
"answer": "Configuration Error: HF_TOKEN is missing on the server.",
"model_used": "error-handler",
"sources": []
}
model_id = MODELS.get(req.model, MODELS["default"])
print(f"🤖 Routing request to: {model_id}")
messages = []
if req.system_prompt:
messages.append({"role": "system", "content": req.system_prompt})
messages.append({"role": "user", "content": req.question})
try:
# Premier essai
response = call_hf_api(model_id, messages, req)
# Si échec, Safety Net
if response.status_code != 200:
print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}")
model_id = SAFETY_NET_MODEL
response = call_hf_api(SAFETY_NET_MODEL, messages, req)
# Si tout échoue, Mock Response (ULTIMATE FALLBACK)
if response.status_code != 200:
print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}")
return {
"answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.",
"model_used": "fallback-mock",
"sources": []
}
data = response.json()
answer = ""
if "choices" in data and len(data["choices"]) > 0:
answer = data["choices"][0]["message"]["content"]
elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
answer = data[0]["generated_text"]
elif "generated_text" in data:
answer = data["generated_text"]
else:
print(f"⚠️ Unknown response format: {data}")
answer = "Error: Could not parse model response."
return {
"answer": answer,
"model_used": model_id,
"sources": []
}
except Exception as e:
print(f"❌ Internal Exception: {str(e)}")
# Mock response on crash
return {
"answer": "Une erreur interne inattendue s'est produite. Mes excuses.",
"model_used": "exception-handler",
"sources": []
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|