from fastapi import FastAPI, HTTPException, Header, Depends from pydantic import BaseModel import requests import os import json from typing import Optional, Dict app = FastAPI(title="CygnisAI Studio API") # --- CONFIGURATION --- # Token HF pour appeler les modèles (à configurer dans les Secrets du Space) HF_TOKEN = os.environ.get("HF_TOKEN") # Clé API statique pour sécuriser VOTRE API (à configurer dans les Secrets du Space) # Par défaut pour le test local : CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345") # Mapping des modèles demandés vers les endpoints réels Hugging Face MODELS = { "google/gemma-3-27b-it": "google/gemma-2-27b-it", "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5 "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO", "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3", "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct", "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "default": "meta-llama/Meta-Llama-3-8B-Instruct" } # URL de base du routeur d'inférence HF HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models" # --- SCHEMAS --- class ChatRequest(BaseModel): question: str model: Optional[str] = "default" system_prompt: Optional[str] = None temperature: Optional[float] = 0.7 max_tokens: Optional[int] = 1024 class ChatResponse(BaseModel): answer: str model_used: str sources: list = [] # --- SECURITE --- async def verify_api_key(authorization: str = Header(None)): if not authorization: # Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée # Mais pour la prod, il vaut mieux être strict. # Ici, on log juste l'erreur. print("⚠️ Missing Authorization header") raise HTTPException(status_code=401, detail="Missing Authorization header") try: scheme, token = authorization.split() if scheme.lower() != 'bearer': raise HTTPException(status_code=401, detail="Invalid authentication scheme") if token != CYGNIS_API_KEY: print(f"⚠️ Invalid API Key: {token}") raise HTTPException(status_code=403, detail="Invalid API Key") except ValueError: raise HTTPException(status_code=401, detail="Invalid authorization header format") # --- ENDPOINTS --- @app.get("/") def read_root(): return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)} @app.post("/api/ask", response_model=ChatResponse) async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)): print(f"📩 Received request: {req.question[:50]}...") if not HF_TOKEN: print("❌ CRITICAL: HF_TOKEN is missing in environment variables!") raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.") # 1. Sélection du modèle model_id = MODELS.get(req.model, MODELS["default"]) print(f"🤖 Routing request to: {model_id}") # 2. Construction du prompt messages = [] if req.system_prompt: messages.append({"role": "system", "content": req.system_prompt}) messages.append({"role": "user", "content": req.question}) payload = { "model": model_id, "messages": messages, "max_tokens": req.max_tokens, "temperature": req.temperature, "stream": False } headers = { "Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json" } try: # 3. Appel à Hugging Face (Endpoint compatible OpenAI) hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions" print(f"🚀 Calling HF API: {hf_chat_url}") response = requests.post(hf_chat_url, headers=headers, json=payload) # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404) if response.status_code == 404: print("🔄 Fallback to standard inference API (404 on chat endpoint)") api_url = f"https://api-inference.huggingface.co/models/{model_id}" prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:" payload_standard = { "inputs": prompt_str, "parameters": { "max_new_tokens": req.max_tokens, "temperature": req.temperature, "return_full_text": False } } response = requests.post(api_url, headers=headers, json=payload_standard) if response.status_code != 200: print(f"❌ HF Error ({response.status_code}): {response.text}") # On renvoie l'erreur exacte de HF pour le debug raise HTTPException(status_code=502, detail=f"HF Error: {response.text}") data = response.json() # Parsing de la réponse answer = "" if "choices" in data and len(data["choices"]) > 0: answer = data["choices"][0]["message"]["content"] elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]: answer = data[0]["generated_text"] elif "generated_text" in data: answer = data["generated_text"] else: print(f"⚠️ Unknown response format: {data}") answer = "Error: Could not parse model response." return { "answer": answer, "model_used": model_id, "sources": [] } except Exception as e: print(f"❌ Internal Exception: {str(e)}") raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)