API / main.py
Simonc-44's picture
Update main.py
19e3e0a verified
raw
history blame
6.13 kB
from fastapi import FastAPI, HTTPException, Header, Depends
from pydantic import BaseModel
import requests
import os
import json
from typing import Optional, Dict
app = FastAPI(title="CygnisAI Studio API")
# --- CONFIGURATION ---
# Token HF pour appeler les modèles (à configurer dans les Secrets du Space)
HF_TOKEN = os.environ.get("HF_TOKEN")
# Clé API statique pour sécuriser VOTRE API (à configurer dans les Secrets du Space)
# Par défaut pour le test local :
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
# Mapping des modèles demandés vers les endpoints réels Hugging Face
MODELS = {
"google/gemma-3-27b-it": "google/gemma-2-27b-it",
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5
"XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
"default": "meta-llama/Meta-Llama-3-8B-Instruct"
}
# URL de base du routeur d'inférence HF
HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models"
# --- SCHEMAS ---
class ChatRequest(BaseModel):
question: str
model: Optional[str] = "default"
system_prompt: Optional[str] = None
temperature: Optional[float] = 0.7
max_tokens: Optional[int] = 1024
class ChatResponse(BaseModel):
answer: str
model_used: str
sources: list = []
# --- SECURITE ---
async def verify_api_key(authorization: str = Header(None)):
if not authorization:
# Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée
# Mais pour la prod, il vaut mieux être strict.
# Ici, on log juste l'erreur.
print("⚠️ Missing Authorization header")
raise HTTPException(status_code=401, detail="Missing Authorization header")
try:
scheme, token = authorization.split()
if scheme.lower() != 'bearer':
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
if token != CYGNIS_API_KEY:
print(f"⚠️ Invalid API Key: {token}")
raise HTTPException(status_code=403, detail="Invalid API Key")
except ValueError:
raise HTTPException(status_code=401, detail="Invalid authorization header format")
# --- ENDPOINTS ---
@app.get("/")
def read_root():
return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
@app.post("/api/ask", response_model=ChatResponse)
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
print(f"📩 Received request: {req.question[:50]}...")
if not HF_TOKEN:
print("❌ CRITICAL: HF_TOKEN is missing in environment variables!")
raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
# 1. Sélection du modèle
model_id = MODELS.get(req.model, MODELS["default"])
print(f"🤖 Routing request to: {model_id}")
# 2. Construction du prompt
messages = []
if req.system_prompt:
messages.append({"role": "system", "content": req.system_prompt})
messages.append({"role": "user", "content": req.question})
payload = {
"model": model_id,
"messages": messages,
"max_tokens": req.max_tokens,
"temperature": req.temperature,
"stream": False
}
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
try:
# 3. Appel à Hugging Face (Endpoint compatible OpenAI)
hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
print(f"🚀 Calling HF API: {hf_chat_url}")
response = requests.post(hf_chat_url, headers=headers, json=payload)
# Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404)
if response.status_code == 404:
print("🔄 Fallback to standard inference API (404 on chat endpoint)")
api_url = f"https://api-inference.huggingface.co/models/{model_id}"
prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
payload_standard = {
"inputs": prompt_str,
"parameters": {
"max_new_tokens": req.max_tokens,
"temperature": req.temperature,
"return_full_text": False
}
}
response = requests.post(api_url, headers=headers, json=payload_standard)
if response.status_code != 200:
print(f"❌ HF Error ({response.status_code}): {response.text}")
# On renvoie l'erreur exacte de HF pour le debug
raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
data = response.json()
# Parsing de la réponse
answer = ""
if "choices" in data and len(data["choices"]) > 0:
answer = data["choices"][0]["message"]["content"]
elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
answer = data[0]["generated_text"]
elif "generated_text" in data:
answer = data["generated_text"]
else:
print(f"⚠️ Unknown response format: {data}")
answer = "Error: Could not parse model response."
return {
"answer": answer,
"model_used": model_id,
"sources": []
}
except Exception as e:
print(f"❌ Internal Exception: {str(e)}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)