Update main.py
Browse files
main.py
CHANGED
|
@@ -11,7 +11,8 @@ app = FastAPI(title="CygnisAI Studio API")
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
| 14 |
-
# Mapping vers des modèles DISPONIBLES sur
|
|
|
|
| 15 |
MODELS = {
|
| 16 |
# Gemma 2 9B (Google) - Très rapide et dispo
|
| 17 |
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
|
|
@@ -34,12 +35,15 @@ MODELS = {
|
|
| 34 |
# Llama 3.1 8B (Meta) - Standard
|
| 35 |
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 36 |
|
| 37 |
-
# Défaut :
|
| 38 |
-
"default": "
|
| 39 |
}
|
| 40 |
|
| 41 |
-
# URL de base
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
class ChatRequest(BaseModel):
|
| 45 |
question: str
|
|
@@ -94,7 +98,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
| 97 |
-
hf_chat_url = f"{
|
| 98 |
|
| 99 |
payload_chat = {
|
| 100 |
"model": model_id,
|
|
@@ -107,12 +111,13 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 107 |
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
|
| 108 |
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
|
| 109 |
|
| 110 |
-
# 2. Fallback via endpoint Inference Standard
|
| 111 |
if response.status_code in [404, 405]:
|
| 112 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
| 113 |
-
api_url = f"{HF_ROUTER_BASE}/{model_id}"
|
| 114 |
|
| 115 |
-
#
|
|
|
|
|
|
|
| 116 |
prompt_str = ""
|
| 117 |
for msg in messages:
|
| 118 |
prompt_str += f"{msg['role']}: {msg['content']}\n"
|
|
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
| 14 |
+
# Mapping vers des modèles DISPONIBLES et STABLES sur l'API d'inférence Hugging Face
|
| 15 |
+
# Note: Les modèles gratuits peuvent être instables ou en chargement.
|
| 16 |
MODELS = {
|
| 17 |
# Gemma 2 9B (Google) - Très rapide et dispo
|
| 18 |
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
|
|
|
|
| 35 |
# Llama 3.1 8B (Meta) - Standard
|
| 36 |
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 37 |
|
| 38 |
+
# Défaut : Gemma 2B IT (très stable et rapide pour le free tier)
|
| 39 |
+
"default": "google/gemma-2b-it"
|
| 40 |
}
|
| 41 |
|
| 42 |
+
# URL de base pour le endpoint Chat (OpenAI compatible)
|
| 43 |
+
HF_CHAT_BASE = "https://router.huggingface.co/hf-inference/models"
|
| 44 |
+
# URL de base pour l'API d'inférence standard
|
| 45 |
+
HF_INFERENCE_API_BASE = "https://api-inference.huggingface.co/models"
|
| 46 |
+
|
| 47 |
|
| 48 |
class ChatRequest(BaseModel):
|
| 49 |
question: str
|
|
|
|
| 98 |
|
| 99 |
try:
|
| 100 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
| 101 |
+
hf_chat_url = f"{HF_CHAT_BASE}/{model_id}/v1/chat/completions"
|
| 102 |
|
| 103 |
payload_chat = {
|
| 104 |
"model": model_id,
|
|
|
|
| 111 |
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
|
| 112 |
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
|
| 113 |
|
| 114 |
+
# 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
|
| 115 |
if response.status_code in [404, 405]:
|
| 116 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
|
|
|
| 117 |
|
| 118 |
+
# Utilisation de l'URL correcte pour l'API d'inférence standard
|
| 119 |
+
api_url = f"{HF_INFERENCE_API_BASE}/{model_id}"
|
| 120 |
+
|
| 121 |
prompt_str = ""
|
| 122 |
for msg in messages:
|
| 123 |
prompt_str += f"{msg['role']}: {msg['content']}\n"
|