API / main.py
Simonc-44's picture
Update main.py
0fcecd1 verified
from fastapi import FastAPI, HTTPException, Header, Depends
from pydantic import BaseModel
import requests
import os
import json
from typing import Optional, Dict
app = FastAPI(title="CygnisAI Studio API")
# --- CONFIGURATION ---
HF_TOKEN = os.environ.get("HF_TOKEN")
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
# Mapping vers des modèles NON-GATED et POPULAIRES
MODELS = {
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
"XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
# Défaut : Qwen 2.5 (Très robuste et souvent dispo)
"default": "Qwen/Qwen2.5-7B-Instruct"
}
# Modèle de secours ultime (Microsoft Phi 3.5 est très léger et souvent dispo)
SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct"
# URL de base UNIQUE pour le routeur HF
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
class ChatRequest(BaseModel):
question: str
model: Optional[str] = "default"
system_prompt: Optional[str] = None
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 1024
class ChatResponse(BaseModel):
answer: str
model_used: str
sources: list = []
async def verify_api_key(authorization: str = Header(None)):
if not authorization:
print("⚠️ Missing Authorization header")
# On ne bloque pas pour faciliter le debug, mais on log
try:
scheme, token = authorization.split()
if scheme.lower() != 'bearer':
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
if token != CYGNIS_API_KEY:
print(f"⚠️ Invalid API Key: {token}")
# raise HTTPException(status_code=403, detail="Invalid API Key") # Commenté pour debug
except ValueError:
pass # On laisse passer pour le moment
@app.get("/")
def read_root():
return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
def call_hf_api(model_id, messages, req):
"""Fonction helper pour appeler l'API HF avec gestion Chat/Standard"""
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
# 1. Tentative Chat API
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
payload_chat = {
"model": model_id,
"messages": messages,
"max_tokens": req.max_tokens,
"temperature": req.temperature,
"stream": False
}
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
# 2. Fallback Standard API
if response.status_code in [404, 405]:
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
api_url = f"{HF_ROUTER_BASE}/{model_id}"
prompt_str = ""
for msg in messages:
role = msg['role']
content = msg['content']
if role == 'system': prompt_str += f"<|system|>\n{content}\n"
elif role == 'user': prompt_str += f"<|user|>\n{content}\n"
elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n"
prompt_str += "<|assistant|>\n"
payload_standard = {
"inputs": prompt_str,
"parameters": {
"max_new_tokens": req.max_tokens,
"temperature": req.temperature,
"return_full_text": False
}
}
print(f"🚀 Calling HF Standard API: {api_url}")
response = requests.post(api_url, headers=headers, json=payload_standard)
return response
@app.post("/api/ask", response_model=ChatResponse)
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
print(f"📩 Received request: {req.question[:50]}...")
if not HF_TOKEN:
print("❌ CRITICAL: HF_TOKEN is missing!")
# Mock response instead of crash
return {
"answer": "Configuration Error: HF_TOKEN is missing on the server.",
"model_used": "error-handler",
"sources": []
}
model_id = MODELS.get(req.model, MODELS["default"])
print(f"🤖 Routing request to: {model_id}")
messages = []
if req.system_prompt:
messages.append({"role": "system", "content": req.system_prompt})
messages.append({"role": "user", "content": req.question})
try:
# Premier essai
response = call_hf_api(model_id, messages, req)
# Si échec, Safety Net
if response.status_code != 200:
print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}")
model_id = SAFETY_NET_MODEL
response = call_hf_api(SAFETY_NET_MODEL, messages, req)
# Si tout échoue, Mock Response (ULTIMATE FALLBACK)
if response.status_code != 200:
print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}")
return {
"answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.",
"model_used": "fallback-mock",
"sources": []
}
data = response.json()
answer = ""
if "choices" in data and len(data["choices"]) > 0:
answer = data["choices"][0]["message"]["content"]
elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
answer = data[0]["generated_text"]
elif "generated_text" in data:
answer = data["generated_text"]
else:
print(f"⚠️ Unknown response format: {data}")
answer = "Error: Could not parse model response."
return {
"answer": answer,
"model_used": model_id,
"sources": []
}
except Exception as e:
print(f"❌ Internal Exception: {str(e)}")
# Mock response on crash
return {
"answer": "Une erreur interne inattendue s'est produite. Mes excuses.",
"model_used": "exception-handler",
"sources": []
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)