Spaces:

Simonc-44
/

API

Running

App Files Files Community

Simonc-44 commited on 23 days ago

Commit

b1d8113

verified ·

1 Parent(s): 9a8344d

Update main.py

Browse files

Files changed (1) hide show

main.py +10 -11

main.py CHANGED Viewed

@@ -11,8 +11,7 @@ app = FastAPI(title="CygnisAI Studio API")
 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
-# Mapping vers des modèles DISPONIBLES et STABLES sur l'API d'inférence Hugging Face
-# Note: Les modèles gratuits peuvent être instables ou en chargement.
 MODELS = {
     # Gemma 2 9B (Google) - Très rapide et dispo
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
@@ -39,11 +38,8 @@ MODELS = {
     "default": "google/gemma-2b-it"
 }
-# URL de base pour le endpoint Chat (OpenAI compatible)
-HF_CHAT_BASE = "https://router.huggingface.co/hf-inference/models"
-# URL de base pour l'API d'inférence standard
-HF_INFERENCE_API_BASE = "https://api-inference.huggingface.co/models"
 class ChatRequest(BaseModel):
     question: str
@@ -98,7 +94,8 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
     try:
         # 1. Tentative via endpoint Chat (OpenAI compatible)
-        hf_chat_url = f"{HF_CHAT_BASE}/{model_id}/v1/chat/completions"
         payload_chat = {
             "model": model_id,
@@ -115,8 +112,9 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
         if response.status_code in [404, 405]:
              print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
-             # Utilisation de l'URL correcte pour l'API d'inférence standard
-             api_url = f"{HF_INFERENCE_API_BASE}/{model_id}"
              prompt_str = ""
              for msg in messages:
@@ -131,6 +129,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
                      "return_full_text": False
                  }
              }
              response = requests.post(api_url, headers=headers, json=payload_standard)
         if response.status_code != 200:
@@ -162,4 +161,4 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 HF_TOKEN = os.environ.get("HF_TOKEN")
 CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
+# Mapping vers des modèles DISPONIBLES et STABLES sur le routeur Hugging Face
 MODELS = {
     # Gemma 2 9B (Google) - Très rapide et dispo
     "google/gemma-3-27b-it": "google/gemma-2-9b-it",
     "default": "google/gemma-2b-it"
 }
+# URL de base UNIQUE pour le routeur HF (utilisée pour Chat ET Inference standard)
+HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
 class ChatRequest(BaseModel):
     question: str
     try:
         # 1. Tentative via endpoint Chat (OpenAI compatible)
+        # URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
+        hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
         payload_chat = {
             "model": model_id,
         if response.status_code in [404, 405]:
              print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
+             # URL: https://router.huggingface.co/hf-inference/models/{model_id}
+             # IMPORTANT: On utilise bien le routeur ici aussi !
+             api_url = f"{HF_ROUTER_BASE}/{model_id}"
              prompt_str = ""
              for msg in messages:
                      "return_full_text": False
                  }
              }
+             print(f"🚀 Calling HF Standard API: {api_url}")
              response = requests.post(api_url, headers=headers, json=payload_standard)
         if response.status_code != 200:
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)