Simonc-44 commited on
Commit
a80c974
·
verified ·
1 Parent(s): 330b19d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +30 -13
main.py CHANGED
@@ -11,18 +11,34 @@ app = FastAPI(title="CygnisAI Studio API")
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
13
 
 
14
  MODELS = {
15
- "google/gemma-3-27b-it": "google/gemma-2-27b-it",
 
 
 
16
  "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
17
- "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct",
18
- "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
19
- "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
20
- "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
21
- "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
22
- "default": "meta-llama/Meta-Llama-3-8B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
 
25
- # NOUVELLE URL DE BASE UNIQUE
26
  HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
27
 
28
  class ChatRequest(BaseModel):
@@ -78,7 +94,6 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
78
 
79
  try:
80
  # 1. Tentative via endpoint Chat (OpenAI compatible)
81
- # URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
82
  hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
83
 
84
  payload_chat = {
@@ -92,14 +107,16 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
92
  print(f"🚀 Calling HF Chat API: {hf_chat_url}")
93
  response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
94
 
95
- # 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
96
  if response.status_code in [404, 405]:
97
  print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
98
-
99
- # URL: https://router.huggingface.co/hf-inference/models/{model_id}
100
  api_url = f"{HF_ROUTER_BASE}/{model_id}"
101
 
102
- prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
 
 
 
 
103
 
104
  payload_standard = {
105
  "inputs": prompt_str,
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
13
 
14
+ # Mapping vers des modèles DISPONIBLES sur le routeur gratuit
15
  MODELS = {
16
+ # Gemma 2 9B (Google) - Très rapide et dispo
17
+ "google/gemma-3-27b-it": "google/gemma-2-9b-it",
18
+
19
+ # Llama 3.1 70B (Meta) - Puissant
20
  "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
21
+
22
+ # Qwen 2.5 7B (Alibaba) - Excellent généraliste
23
+ "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
24
+
25
+ # Phi 3.5 (Microsoft) - Léger
26
+ "XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",
27
+
28
+ # DeepSeek R1 (Distill Llama 8B) - Raisonnement
29
+ "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
30
+
31
+ # Llama 3.2 3B (Meta) - Ultra rapide
32
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",
33
+
34
+ # Llama 3.1 8B (Meta) - Standard
35
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",
36
+
37
+ # Défaut : Llama 3.1 8B (Le plus stable sur le routeur)
38
+ "default": "meta-llama/Meta-Llama-3.1-8B-Instruct"
39
  }
40
 
41
+ # URL de base du routeur d'inférence HF
42
  HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
43
 
44
  class ChatRequest(BaseModel):
 
94
 
95
  try:
96
  # 1. Tentative via endpoint Chat (OpenAI compatible)
 
97
  hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
98
 
99
  payload_chat = {
 
107
  print(f"🚀 Calling HF Chat API: {hf_chat_url}")
108
  response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
109
 
110
+ # 2. Fallback via endpoint Inference Standard
111
  if response.status_code in [404, 405]:
112
  print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
 
 
113
  api_url = f"{HF_ROUTER_BASE}/{model_id}"
114
 
115
+ # Formatage simple pour le fallback
116
+ prompt_str = ""
117
+ for msg in messages:
118
+ prompt_str += f"{msg['role']}: {msg['content']}\n"
119
+ prompt_str += "assistant:"
120
 
121
  payload_standard = {
122
  "inputs": prompt_str,