Spaces:

Simonc-44
/

API

Running

App Files Files Community

Simonc-44 commited on 23 days ago

Commit

19e3e0a

verified ·

1 Parent(s): 7f68b18

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -23

main.py CHANGED Viewed

@@ -16,16 +16,14 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
 # Mapping des modèles demandés vers les endpoints réels Hugging Face
-# Note: J'ai mappé vers les modèles réels les plus proches car Llama 4 / Gemma 3 n'existent pas encore publiquement.
-# Vous pourrez mettre à jour ces IDs dès leur sortie.
 MODELS = {
-    "google/gemma-3-27b-it": "google/gemma-2-27b-it", # Fallback Gemma 2
-    "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", # Fallback Llama 3.1 70B (puissant)
-    "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2-VL-7B-Instruct", # Fallback Qwen 2 VL
-    "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO", # Fallback Xiaomi
-    "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3", # Fallback V3
-    "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct", # Fallback Llama 3.1
-    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", # Fallback Nemotron
     "default": "meta-llama/Meta-Llama-3-8B-Instruct"
 }
@@ -48,6 +46,10 @@ class ChatResponse(BaseModel):
 # --- SECURITE ---
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
         raise HTTPException(status_code=401, detail="Missing Authorization header")
     try:
@@ -56,6 +58,7 @@ async def verify_api_key(authorization: str = Header(None)):
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
             raise HTTPException(status_code=403, detail="Invalid API Key")
     except ValueError:
@@ -65,21 +68,22 @@ async def verify_api_key(authorization: str = Header(None)):
 @app.get("/")
 def read_root():
-    return {"status": "online", "service": "CygnisAI Studio API"}
 @app.post("/api/ask", response_model=ChatResponse)
 async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
     if not HF_TOKEN:
-        print("⚠️ WARNING: HF_TOKEN not set. Calls to HF will fail.")
     # 1. Sélection du modèle
     model_id = MODELS.get(req.model, MODELS["default"])
-    api_url = f"{HF_INFERENCE_BASE}/{model_id}"
     print(f"🤖 Routing request to: {model_id}")
     # 2. Construction du prompt
-    # On utilise le format standard chat template si possible, sinon raw text
     messages = []
     if req.system_prompt:
         messages.append({"role": "system", "content": req.system_prompt})
@@ -100,17 +104,16 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     try:
         # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
-        # Note: router.huggingface.co supporte souvent /v1/chat/completions
-        # Si ça échoue, on tentera l'appel direct
         hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
         response = requests.post(hf_chat_url, headers=headers, json=payload)
-        # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle
         if response.status_code == 404:
-             print("🔄 Fallback to standard inference API")
-             # Pour l'API standard, on doit souvent envoyer une string unique
-             # Ceci est une simplification, idéalement on utiliserait le tokenizer du modèle
              prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
              payload_standard = {
@@ -125,11 +128,12 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         if response.status_code != 200:
             print(f"❌ HF Error ({response.status_code}): {response.text}")
-            raise HTTPException(status_code=502, detail=f"Model provider error: {response.text}")
         data = response.json()
-        # Parsing de la réponse (gère les deux formats possibles)
         answer = ""
         if "choices" in data and len(data["choices"]) > 0:
             answer = data["choices"][0]["message"]["content"]
@@ -138,6 +142,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         elif "generated_text" in data:
             answer = data["generated_text"]
         else:
             answer = "Error: Could not parse model response."
         return {
@@ -147,8 +152,8 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         }
     except Exception as e:
-        print(f"❌ Internal Error: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn

 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
 # Mapping des modèles demandés vers les endpoints réels Hugging Face
 MODELS = {
+    "google/gemma-3-27b-it": "google/gemma-2-27b-it",
+    "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5
+    "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
+    "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
+    "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
     "default": "meta-llama/Meta-Llama-3-8B-Instruct"
 }
 # --- SECURITE ---
 async def verify_api_key(authorization: str = Header(None)):
     if not authorization:
+        # Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée
+        # Mais pour la prod, il vaut mieux être strict.
+        # Ici, on log juste l'erreur.
+        print("⚠️ Missing Authorization header")
         raise HTTPException(status_code=401, detail="Missing Authorization header")
     try:
             raise HTTPException(status_code=401, detail="Invalid authentication scheme")
         if token != CYGNIS_API_KEY:
+            print(f"⚠️ Invalid API Key: {token}")
             raise HTTPException(status_code=403, detail="Invalid API Key")
     except ValueError:
 @app.get("/")
 def read_root():
+    return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
 @app.post("/api/ask", response_model=ChatResponse)
 async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
+    print(f"📩 Received request: {req.question[:50]}...")
     if not HF_TOKEN:
+        print("❌ CRITICAL: HF_TOKEN is missing in environment variables!")
+        raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
     # 1. Sélection du modèle
     model_id = MODELS.get(req.model, MODELS["default"])
     print(f"🤖 Routing request to: {model_id}")
     # 2. Construction du prompt
     messages = []
     if req.system_prompt:
         messages.append({"role": "system", "content": req.system_prompt})
     try:
         # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
         hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
+        print(f"🚀 Calling HF API: {hf_chat_url}")
         response = requests.post(hf_chat_url, headers=headers, json=payload)
+        # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404)
         if response.status_code == 404:
+             print("🔄 Fallback to standard inference API (404 on chat endpoint)")
+             api_url = f"https://api-inference.huggingface.co/models/{model_id}"
              prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
              payload_standard = {
         if response.status_code != 200:
             print(f"❌ HF Error ({response.status_code}): {response.text}")
+            # On renvoie l'erreur exacte de HF pour le debug
+            raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
         data = response.json()
+        # Parsing de la réponse
         answer = ""
         if "choices" in data and len(data["choices"]) > 0:
             answer = data["choices"][0]["message"]["content"]
         elif "generated_text" in data:
             answer = data["generated_text"]
         else:
+            print(f"⚠️ Unknown response format: {data}")
             answer = "Error: Could not parse model response."
         return {
         }
     except Exception as e:
+        print(f"❌ Internal Exception: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
 if __name__ == "__main__":
     import uvicorn