Update main.py
Browse files
main.py
CHANGED
|
@@ -11,18 +11,34 @@ app = FastAPI(title="CygnisAI Studio API")
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
|
|
|
| 14 |
MODELS = {
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
"
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
}
|
| 24 |
|
| 25 |
-
#
|
| 26 |
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
|
| 27 |
|
| 28 |
class ChatRequest(BaseModel):
|
|
@@ -78,7 +94,6 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 78 |
|
| 79 |
try:
|
| 80 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
| 81 |
-
# URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
|
| 82 |
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
|
| 83 |
|
| 84 |
payload_chat = {
|
|
@@ -92,14 +107,16 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
|
|
| 92 |
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
|
| 93 |
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
|
| 94 |
|
| 95 |
-
# 2. Fallback via endpoint Inference Standard
|
| 96 |
if response.status_code in [404, 405]:
|
| 97 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
| 98 |
-
|
| 99 |
-
# URL: https://router.huggingface.co/hf-inference/models/{model_id}
|
| 100 |
api_url = f"{HF_ROUTER_BASE}/{model_id}"
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
payload_standard = {
|
| 105 |
"inputs": prompt_str,
|
|
|
|
| 11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 12 |
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
|
| 13 |
|
| 14 |
+
# Mapping vers des modèles DISPONIBLES sur le routeur gratuit
|
| 15 |
MODELS = {
|
| 16 |
+
# Gemma 2 9B (Google) - Très rapide et dispo
|
| 17 |
+
"google/gemma-3-27b-it": "google/gemma-2-9b-it",
|
| 18 |
+
|
| 19 |
+
# Llama 3.1 70B (Meta) - Puissant
|
| 20 |
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
| 21 |
+
|
| 22 |
+
# Qwen 2.5 7B (Alibaba) - Excellent généraliste
|
| 23 |
+
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
|
| 24 |
+
|
| 25 |
+
# Phi 3.5 (Microsoft) - Léger
|
| 26 |
+
"XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
|
| 27 |
+
|
| 28 |
+
# DeepSeek R1 (Distill Llama 8B) - Raisonnement
|
| 29 |
+
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 30 |
+
|
| 31 |
+
# Llama 3.2 3B (Meta) - Ultra rapide
|
| 32 |
+
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
|
| 33 |
+
|
| 34 |
+
# Llama 3.1 8B (Meta) - Standard
|
| 35 |
+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 36 |
+
|
| 37 |
+
# Défaut : Llama 3.1 8B (Le plus stable sur le routeur)
|
| 38 |
+
"default": "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
| 39 |
}
|
| 40 |
|
| 41 |
+
# URL de base du routeur d'inférence HF
|
| 42 |
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
|
| 43 |
|
| 44 |
class ChatRequest(BaseModel):
|
|
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
# 1. Tentative via endpoint Chat (OpenAI compatible)
|
|
|
|
| 97 |
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
|
| 98 |
|
| 99 |
payload_chat = {
|
|
|
|
| 107 |
print(f"🚀 Calling HF Chat API: {hf_chat_url}")
|
| 108 |
response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
|
| 109 |
|
| 110 |
+
# 2. Fallback via endpoint Inference Standard
|
| 111 |
if response.status_code in [404, 405]:
|
| 112 |
print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
|
|
|
|
|
|
|
| 113 |
api_url = f"{HF_ROUTER_BASE}/{model_id}"
|
| 114 |
|
| 115 |
+
# Formatage simple pour le fallback
|
| 116 |
+
prompt_str = ""
|
| 117 |
+
for msg in messages:
|
| 118 |
+
prompt_str += f"{msg['role']}: {msg['content']}\n"
|
| 119 |
+
prompt_str += "assistant:"
|
| 120 |
|
| 121 |
payload_standard = {
|
| 122 |
"inputs": prompt_str,
|