Simonc-44 commited on
Commit
19e3e0a
·
verified ·
1 Parent(s): 7f68b18

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -23
main.py CHANGED
@@ -16,16 +16,14 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
16
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
17
 
18
  # Mapping des modèles demandés vers les endpoints réels Hugging Face
19
- # Note: J'ai mappé vers les modèles réels les plus proches car Llama 4 / Gemma 3 n'existent pas encore publiquement.
20
- # Vous pourrez mettre à jour ces IDs dès leur sortie.
21
  MODELS = {
22
- "google/gemma-3-27b-it": "google/gemma-2-27b-it", # Fallback Gemma 2
23
- "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", # Fallback Llama 3.1 70B (puissant)
24
- "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2-VL-7B-Instruct", # Fallback Qwen 2 VL
25
- "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO", # Fallback Xiaomi
26
- "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3", # Fallback V3
27
- "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct", # Fallback Llama 3.1
28
- "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", # Fallback Nemotron
29
  "default": "meta-llama/Meta-Llama-3-8B-Instruct"
30
  }
31
 
@@ -48,6 +46,10 @@ class ChatResponse(BaseModel):
48
  # --- SECURITE ---
49
  async def verify_api_key(authorization: str = Header(None)):
50
  if not authorization:
 
 
 
 
51
  raise HTTPException(status_code=401, detail="Missing Authorization header")
52
 
53
  try:
@@ -56,6 +58,7 @@ async def verify_api_key(authorization: str = Header(None)):
56
  raise HTTPException(status_code=401, detail="Invalid authentication scheme")
57
 
58
  if token != CYGNIS_API_KEY:
 
59
  raise HTTPException(status_code=403, detail="Invalid API Key")
60
 
61
  except ValueError:
@@ -65,21 +68,22 @@ async def verify_api_key(authorization: str = Header(None)):
65
 
66
  @app.get("/")
67
  def read_root():
68
- return {"status": "online", "service": "CygnisAI Studio API"}
69
 
70
  @app.post("/api/ask", response_model=ChatResponse)
71
  async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
 
 
72
  if not HF_TOKEN:
73
- print("⚠️ WARNING: HF_TOKEN not set. Calls to HF will fail.")
 
74
 
75
  # 1. Sélection du modèle
76
  model_id = MODELS.get(req.model, MODELS["default"])
77
- api_url = f"{HF_INFERENCE_BASE}/{model_id}"
78
 
79
  print(f"🤖 Routing request to: {model_id}")
80
 
81
  # 2. Construction du prompt
82
- # On utilise le format standard chat template si possible, sinon raw text
83
  messages = []
84
  if req.system_prompt:
85
  messages.append({"role": "system", "content": req.system_prompt})
@@ -100,17 +104,16 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
100
 
101
  try:
102
  # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
103
- # Note: router.huggingface.co supporte souvent /v1/chat/completions
104
- # Si ça échoue, on tentera l'appel direct
105
  hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
106
 
 
107
  response = requests.post(hf_chat_url, headers=headers, json=payload)
108
 
109
- # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle
110
  if response.status_code == 404:
111
- print("🔄 Fallback to standard inference API")
112
- # Pour l'API standard, on doit souvent envoyer une string unique
113
- # Ceci est une simplification, idéalement on utiliserait le tokenizer du modèle
114
  prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
115
 
116
  payload_standard = {
@@ -125,11 +128,12 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
125
 
126
  if response.status_code != 200:
127
  print(f"❌ HF Error ({response.status_code}): {response.text}")
128
- raise HTTPException(status_code=502, detail=f"Model provider error: {response.text}")
 
129
 
130
  data = response.json()
131
 
132
- # Parsing de la réponse (gère les deux formats possibles)
133
  answer = ""
134
  if "choices" in data and len(data["choices"]) > 0:
135
  answer = data["choices"][0]["message"]["content"]
@@ -138,6 +142,7 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
138
  elif "generated_text" in data:
139
  answer = data["generated_text"]
140
  else:
 
141
  answer = "Error: Could not parse model response."
142
 
143
  return {
@@ -147,8 +152,8 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
147
  }
148
 
149
  except Exception as e:
150
- print(f"❌ Internal Error: {str(e)}")
151
- raise HTTPException(status_code=500, detail=str(e))
152
 
153
  if __name__ == "__main__":
154
  import uvicorn
 
16
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
17
 
18
  # Mapping des modèles demandés vers les endpoints réels Hugging Face
 
 
19
  MODELS = {
20
+ "google/gemma-3-27b-it": "google/gemma-2-27b-it",
21
+ "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
22
+ "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5
23
+ "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
24
+ "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
25
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
26
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
27
  "default": "meta-llama/Meta-Llama-3-8B-Instruct"
28
  }
29
 
 
46
  # --- SECURITE ---
47
  async def verify_api_key(authorization: str = Header(None)):
48
  if not authorization:
49
+ # Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée
50
+ # Mais pour la prod, il vaut mieux être strict.
51
+ # Ici, on log juste l'erreur.
52
+ print("⚠️ Missing Authorization header")
53
  raise HTTPException(status_code=401, detail="Missing Authorization header")
54
 
55
  try:
 
58
  raise HTTPException(status_code=401, detail="Invalid authentication scheme")
59
 
60
  if token != CYGNIS_API_KEY:
61
+ print(f"⚠️ Invalid API Key: {token}")
62
  raise HTTPException(status_code=403, detail="Invalid API Key")
63
 
64
  except ValueError:
 
68
 
69
  @app.get("/")
70
  def read_root():
71
+ return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
72
 
73
  @app.post("/api/ask", response_model=ChatResponse)
74
  async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
75
+ print(f"📩 Received request: {req.question[:50]}...")
76
+
77
  if not HF_TOKEN:
78
+ print(" CRITICAL: HF_TOKEN is missing in environment variables!")
79
+ raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
80
 
81
  # 1. Sélection du modèle
82
  model_id = MODELS.get(req.model, MODELS["default"])
 
83
 
84
  print(f"🤖 Routing request to: {model_id}")
85
 
86
  # 2. Construction du prompt
 
87
  messages = []
88
  if req.system_prompt:
89
  messages.append({"role": "system", "content": req.system_prompt})
 
104
 
105
  try:
106
  # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
 
 
107
  hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
108
 
109
+ print(f"🚀 Calling HF API: {hf_chat_url}")
110
  response = requests.post(hf_chat_url, headers=headers, json=payload)
111
 
112
+ # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404)
113
  if response.status_code == 404:
114
+ print("🔄 Fallback to standard inference API (404 on chat endpoint)")
115
+ api_url = f"https://api-inference.huggingface.co/models/{model_id}"
116
+
117
  prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
118
 
119
  payload_standard = {
 
128
 
129
  if response.status_code != 200:
130
  print(f"❌ HF Error ({response.status_code}): {response.text}")
131
+ # On renvoie l'erreur exacte de HF pour le debug
132
+ raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
133
 
134
  data = response.json()
135
 
136
+ # Parsing de la réponse
137
  answer = ""
138
  if "choices" in data and len(data["choices"]) > 0:
139
  answer = data["choices"][0]["message"]["content"]
 
142
  elif "generated_text" in data:
143
  answer = data["generated_text"]
144
  else:
145
+ print(f"⚠️ Unknown response format: {data}")
146
  answer = "Error: Could not parse model response."
147
 
148
  return {
 
152
  }
153
 
154
  except Exception as e:
155
+ print(f"❌ Internal Exception: {str(e)}")
156
+ raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
157
 
158
  if __name__ == "__main__":
159
  import uvicorn