Update main.py
Browse files
main.py
CHANGED
|
@@ -11,8 +11,7 @@ app = FastAPI(title="CygnisAI Studio API")
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
| 14 |
-
# Mapping vers des modèles DISPONIBLES et STABLES sur
|
| 15 |
-
# Note: Les modèles gratuits peuvent être instables ou en chargement.
|
| 16 |
MODELS = {
|
| 17 |
# Gemma 2 9B (Google) - Très rapide et dispo
|
| 18 |
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
|
|
@@ -39,11 +38,8 @@ MODELS = {
|
|
| 39 |
"default": "google/gemma-2b-it"
|
| 40 |
}
|
| 41 |
|
| 42 |
-
# URL de base pour le
|
| 43 |
-
|
| 44 |
-
# URL de base pour l'API d'inférence standard
|
| 45 |
-
HF_INFERENCE_API_BASE = "https://api-inference.huggingface.co/models"
|
| 46 |
-
|
| 47 |
|
| 48 |
class ChatRequest(BaseModel):
|
| 49 |
question: str
|
|
@@ -98,7 +94,8 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 98 |
|
| 99 |
try:
|
| 100 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
| 101 |
-
|
|
|
|
| 102 |
|
| 103 |
payload_chat = {
|
| 104 |
"model": model_id,
|
|
@@ -115,8 +112,9 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 115 |
if response.status_code in [404, 405]:
|
| 116 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
| 117 |
|
| 118 |
-
#
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
prompt_str = ""
|
| 122 |
for msg in messages:
|
|
@@ -131,6 +129,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 131 |
"return_full_text": False
|
| 132 |
}
|
| 133 |
}
|
|
|
|
| 134 |
response = requests.post(api_url, headers=headers, json=payload_standard)
|
| 135 |
|
| 136 |
if response.status_code != 200:
|
|
@@ -162,4 +161,4 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
| 164 |
import uvicorn
|
| 165 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
| 14 |
+
# Mapping vers des modèles DISPONIBLES et STABLES sur le routeur Hugging Face
|
|
|
|
| 15 |
MODELS = {
|
| 16 |
# Gemma 2 9B (Google) - Très rapide et dispo
|
| 17 |
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
|
|
|
|
| 38 |
"default": "google/gemma-2b-it"
|
| 39 |
}
|
| 40 |
|
| 41 |
+
# URL de base UNIQUE pour le routeur HF (utilisée pour Chat ET Inference standard)
|
| 42 |
+
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
class ChatRequest(BaseModel):
|
| 45 |
question: str
|
|
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
| 97 |
+
# URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
|
| 98 |
+
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
|
| 99 |
|
| 100 |
payload_chat = {
|
| 101 |
"model": model_id,
|
|
|
|
| 112 |
if response.status_code in [404, 405]:
|
| 113 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
| 114 |
|
| 115 |
+
# URL: https://router.huggingface.co/hf-inference/models/{model_id}
|
| 116 |
+
# IMPORTANT: On utilise bien le routeur ici aussi !
|
| 117 |
+
api_url = f"{HF_ROUTER_BASE}/{model_id}"
|
| 118 |
|
| 119 |
prompt_str = ""
|
| 120 |
for msg in messages:
|
|
|
|
| 129 |
"return_full_text": False
|
| 130 |
}
|
| 131 |
}
|
| 132 |
+
print(f"🚀 Calling HF Standard API: {api_url}")
|
| 133 |
response = requests.post(api_url, headers=headers, json=payload_standard)
|
| 134 |
|
| 135 |
if response.status_code != 200:
|
|
|
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|
| 163 |
import uvicorn
|
| 164 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|