Spaces:

Simonc-44
/

API

Running

App Files Files Community

Simonc-44 commited on 23 days ago

Commit

330b19d

verified ·

1 Parent(s): 19e3e0a

Update main.py

Browse files

Files changed (1) hide show

main.py +23 -40

main.py CHANGED Viewed

@@ -8,18 +8,13 @@ from typing import Optional, Dict
 app = FastAPI(title="CygnisAI Studio API")
 # --- CONFIGURATION ---
-# Token HF pour appeler les modèles (à configurer dans les Secrets du Space)
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Clé API statique pour sécuriser VOTRE API (à configurer dans les Secrets du Space)
-# Par défaut pour le test local :
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
-# Mapping des modèles demandés vers les endpoints réels Hugging Face
 MODELS = {
     "google/gemma-3-27b-it": "google/gemma-2-27b-it",
     "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
-    "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5
     "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
     "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
     "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -27,10 +22,9 @@ MODELS = {
     "default": "meta-llama/Meta-Llama-3-8B-Instruct"
 }
-# URL de base du routeur d'inférence HF
-HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models"
-# --- SCHEMAS ---
 class ChatRequest(BaseModel):
     question: str
     model: Optional[str] = "default"
@@ -43,29 +37,20 @@ class ChatResponse(BaseModel):
     model_used: str
     sources: list = []
-# --- SECURITE ---
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
-        # Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée
-        # Mais pour la prod, il vaut mieux être strict.
-        # Ici, on log juste l'erreur.
         print("⚠️ Missing Authorization header")
         raise HTTPException(status_code=401, detail="Missing Authorization header")
     try:
         scheme, token = authorization.split()
         if scheme.lower() != 'bearer':
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
             print(f"⚠️ Invalid API Key: {token}")
             raise HTTPException(status_code=403, detail="Invalid API Key")
     except ValueError:
         raise HTTPException(status_code=401, detail="Invalid authorization header format")
-# --- ENDPOINTS ---
 @app.get("/")
 def read_root():
     return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
@@ -75,44 +60,44 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     print(f"📩 Received request: {req.question[:50]}...")
     if not HF_TOKEN:
-        print("❌ CRITICAL: HF_TOKEN is missing in environment variables!")
         raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
-    # 1. Sélection du modèle
     model_id = MODELS.get(req.model, MODELS["default"])
     print(f"🤖 Routing request to: {model_id}")
-    # 2. Construction du prompt
     messages = []
     if req.system_prompt:
         messages.append({"role": "system", "content": req.system_prompt})
     messages.append({"role": "user", "content": req.question})
-    payload = {
-        "model": model_id,
-        "messages": messages,
-        "max_tokens": req.max_tokens,
-        "temperature": req.temperature,
-        "stream": False
-    }
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json"
     }
     try:
-        # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
-        hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
-        print(f"🚀 Calling HF API: {hf_chat_url}")
-        response = requests.post(hf_chat_url, headers=headers, json=payload)
-        # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404)
-        if response.status_code == 404:
-             print("🔄 Fallback to standard inference API (404 on chat endpoint)")
-             api_url = f"https://api-inference.huggingface.co/models/{model_id}"
              prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
@@ -128,12 +113,10 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         if response.status_code != 200:
             print(f"❌ HF Error ({response.status_code}): {response.text}")
-            # On renvoie l'erreur exacte de HF pour le debug
             raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
         data = response.json()
-        # Parsing de la réponse
         answer = ""
         if "choices" in data and len(data["choices"]) > 0:
             answer = data["choices"][0]["message"]["content"]

 app = FastAPI(title="CygnisAI Studio API")
 # --- CONFIGURATION ---
 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
 MODELS = {
     "google/gemma-3-27b-it": "google/gemma-2-27b-it",
     "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct",
     "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
     "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
     "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "default": "meta-llama/Meta-Llama-3-8B-Instruct"
 }
+# NOUVELLE URL DE BASE UNIQUE
+HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
 class ChatRequest(BaseModel):
     question: str
     model: Optional[str] = "default"
     model_used: str
     sources: list = []
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
         print("⚠️ Missing Authorization header")
         raise HTTPException(status_code=401, detail="Missing Authorization header")
     try:
         scheme, token = authorization.split()
         if scheme.lower() != 'bearer':
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
             print(f"⚠️ Invalid API Key: {token}")
             raise HTTPException(status_code=403, detail="Invalid API Key")
     except ValueError:
         raise HTTPException(status_code=401, detail="Invalid authorization header format")
 @app.get("/")
 def read_root():
     return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
     print(f"📩 Received request: {req.question[:50]}...")
     if not HF_TOKEN:
+        print("❌ CRITICAL: HF_TOKEN is missing!")
         raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
     model_id = MODELS.get(req.model, MODELS["default"])
     print(f"🤖 Routing request to: {model_id}")
     messages = []
     if req.system_prompt:
         messages.append({"role": "system", "content": req.system_prompt})
     messages.append({"role": "user", "content": req.question})
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json"
     }
     try:
+        # 1. Tentative via endpoint Chat (OpenAI compatible)
+        # URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
+        hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
+        payload_chat = {
+            "model": model_id,
+            "messages": messages,
+            "max_tokens": req.max_tokens,
+            "temperature": req.temperature,
+            "stream": False
+        }
+        print(f"🚀 Calling HF Chat API: {hf_chat_url}")
+        response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
+        # 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
+        if response.status_code in [404, 405]:
+             print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
+             # URL: https://router.huggingface.co/hf-inference/models/{model_id}
+             api_url = f"{HF_ROUTER_BASE}/{model_id}"
              prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
         if response.status_code != 200:
             print(f"❌ HF Error ({response.status_code}): {response.text}")
             raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
         data = response.json()
         answer = ""
         if "choices" in data and len(data["choices"]) > 0:
             answer = data["choices"][0]["message"]["content"]