Spaces:

Simonc-44
/

API

Running

App Files Files Community

Simonc-44 commited on 23 days ago

Commit

9a8344d

verified ·

1 Parent(s): cbbb240

Update main.py

Browse files

Files changed (1) hide show

main.py +14 -9

main.py CHANGED Viewed

@@ -11,7 +11,8 @@ app = FastAPI(title="CygnisAI Studio API")
 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
-# Mapping vers des modèles DISPONIBLES sur le routeur gratuit
 MODELS = {
     # Gemma 2 9B (Google) - Très rapide et dispo
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
@@ -34,12 +35,15 @@ MODELS = {
     # Llama 3.1 8B (Meta) - Standard
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    # Défaut : Llama 3.1 8B (Le plus stable sur le routeur)
-    "default": "meta-llama/Meta-Llama-3.1-8B-Instruct"
 }
-# URL de base du routeur d'inférence HF
-HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
 class ChatRequest(BaseModel):
     question: str
@@ -94,7 +98,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     try:
         # 1. Tentative via endpoint Chat (OpenAI compatible)
-        hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
         payload_chat = {
             "model": model_id,
@@ -107,12 +111,13 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         print(f"🚀 Calling HF Chat API: {hf_chat_url}")
         response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
-        # 2. Fallback via endpoint Inference Standard
         if response.status_code in [404, 405]:
              print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
-             api_url = f"{HF_ROUTER_BASE}/{model_id}"
-             # Formatage simple pour le fallback
              prompt_str = ""
              for msg in messages:
                  prompt_str += f"{msg['role']}: {msg['content']}\n"

 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
+# Mapping vers des modèles DISPONIBLES et STABLES sur l'API d'inférence Hugging Face
+# Note: Les modèles gratuits peuvent être instables ou en chargement.
 MODELS = {
     # Gemma 2 9B (Google) - Très rapide et dispo
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
     # Llama 3.1 8B (Meta) - Standard
     "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    # Défaut : Gemma 2B IT (très stable et rapide pour le free tier)
+    "default": "google/gemma-2b-it"
 }
+# URL de base pour le endpoint Chat (OpenAI compatible)
+HF_CHAT_BASE = "https://router.huggingface.co/hf-inference/models"
+# URL de base pour l'API d'inférence standard
+HF_INFERENCE_API_BASE = "https://api-inference.huggingface.co/models"
 class ChatRequest(BaseModel):
     question: str
     try:
         # 1. Tentative via endpoint Chat (OpenAI compatible)
+        hf_chat_url = f"{HF_CHAT_BASE}/{model_id}/v1/chat/completions"
         payload_chat = {
             "model": model_id,
         print(f"🚀 Calling HF Chat API: {hf_chat_url}")
         response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
+        # 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
         if response.status_code in [404, 405]:
              print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
+             # Utilisation de l'URL correcte pour l'API d'inférence standard
+             api_url = f"{HF_INFERENCE_API_BASE}/{model_id}"
              prompt_str = ""
              for msg in messages:
                  prompt_str += f"{msg['role']}: {msg['content']}\n"