Simonc-44 commited on
Commit
330b19d
·
verified ·
1 Parent(s): 19e3e0a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +23 -40
main.py CHANGED
@@ -8,18 +8,13 @@ from typing import Optional, Dict
8
  app = FastAPI(title="CygnisAI Studio API")
9
 
10
  # --- CONFIGURATION ---
11
- # Token HF pour appeler les modèles (à configurer dans les Secrets du Space)
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
-
14
- # Clé API statique pour sécuriser VOTRE API (à configurer dans les Secrets du Space)
15
- # Par défaut pour le test local :
16
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
17
 
18
- # Mapping des modèles demandés vers les endpoints réels Hugging Face
19
  MODELS = {
20
  "google/gemma-3-27b-it": "google/gemma-2-27b-it",
21
  "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
22
- "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct", # Correction Qwen 2.5
23
  "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
24
  "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
25
  "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -27,10 +22,9 @@ MODELS = {
27
  "default": "meta-llama/Meta-Llama-3-8B-Instruct"
28
  }
29
 
30
- # URL de base du routeur d'inférence HF
31
- HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models"
32
 
33
- # --- SCHEMAS ---
34
  class ChatRequest(BaseModel):
35
  question: str
36
  model: Optional[str] = "default"
@@ -43,29 +37,20 @@ class ChatResponse(BaseModel):
43
  model_used: str
44
  sources: list = []
45
 
46
- # --- SECURITE ---
47
  async def verify_api_key(authorization: str = Header(None)):
48
  if not authorization:
49
- # Pour le debug, on autorise sans header si on est en local ou si la clé n'est pas forcée
50
- # Mais pour la prod, il vaut mieux être strict.
51
- # Ici, on log juste l'erreur.
52
  print("⚠️ Missing Authorization header")
53
  raise HTTPException(status_code=401, detail="Missing Authorization header")
54
-
55
  try:
56
  scheme, token = authorization.split()
57
  if scheme.lower() != 'bearer':
58
  raise HTTPException(status_code=401, detail="Invalid authentication scheme")
59
-
60
  if token != CYGNIS_API_KEY:
61
  print(f"⚠️ Invalid API Key: {token}")
62
  raise HTTPException(status_code=403, detail="Invalid API Key")
63
-
64
  except ValueError:
65
  raise HTTPException(status_code=401, detail="Invalid authorization header format")
66
 
67
- # --- ENDPOINTS ---
68
-
69
  @app.get("/")
70
  def read_root():
71
  return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
@@ -75,44 +60,44 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
75
  print(f"📩 Received request: {req.question[:50]}...")
76
 
77
  if not HF_TOKEN:
78
- print("❌ CRITICAL: HF_TOKEN is missing in environment variables!")
79
  raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
80
 
81
- # 1. Sélection du modèle
82
  model_id = MODELS.get(req.model, MODELS["default"])
83
-
84
  print(f"🤖 Routing request to: {model_id}")
85
 
86
- # 2. Construction du prompt
87
  messages = []
88
  if req.system_prompt:
89
  messages.append({"role": "system", "content": req.system_prompt})
90
  messages.append({"role": "user", "content": req.question})
91
 
92
- payload = {
93
- "model": model_id,
94
- "messages": messages,
95
- "max_tokens": req.max_tokens,
96
- "temperature": req.temperature,
97
- "stream": False
98
- }
99
-
100
  headers = {
101
  "Authorization": f"Bearer {HF_TOKEN}",
102
  "Content-Type": "application/json"
103
  }
104
 
105
  try:
106
- # 3. Appel à Hugging Face (Endpoint compatible OpenAI)
107
- hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions"
 
108
 
109
- print(f"🚀 Calling HF API: {hf_chat_url}")
110
- response = requests.post(hf_chat_url, headers=headers, json=payload)
 
 
 
 
 
 
 
 
111
 
112
- # Fallback si le endpoint OpenAI n'est pas supporté pour ce modèle (404)
113
- if response.status_code == 404:
114
- print("🔄 Fallback to standard inference API (404 on chat endpoint)")
115
- api_url = f"https://api-inference.huggingface.co/models/{model_id}"
 
 
116
 
117
  prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
118
 
@@ -128,12 +113,10 @@ async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)
128
 
129
  if response.status_code != 200:
130
  print(f"❌ HF Error ({response.status_code}): {response.text}")
131
- # On renvoie l'erreur exacte de HF pour le debug
132
  raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
133
 
134
  data = response.json()
135
 
136
- # Parsing de la réponse
137
  answer = ""
138
  if "choices" in data and len(data["choices"]) > 0:
139
  answer = data["choices"][0]["message"]["content"]
 
8
  app = FastAPI(title="CygnisAI Studio API")
9
 
10
  # --- CONFIGURATION ---
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
 
12
  CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")
13
 
 
14
  MODELS = {
15
  "google/gemma-3-27b-it": "google/gemma-2-27b-it",
16
  "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
17
+ "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-72B-Instruct",
18
  "XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO",
19
  "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3",
20
  "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
 
22
  "default": "meta-llama/Meta-Llama-3-8B-Instruct"
23
  }
24
 
25
+ # NOUVELLE URL DE BASE UNIQUE
26
+ HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"
27
 
 
28
  class ChatRequest(BaseModel):
29
  question: str
30
  model: Optional[str] = "default"
 
37
  model_used: str
38
  sources: list = []
39
 
 
40
  async def verify_api_key(authorization: str = Header(None)):
41
  if not authorization:
 
 
 
42
  print("⚠️ Missing Authorization header")
43
  raise HTTPException(status_code=401, detail="Missing Authorization header")
 
44
  try:
45
  scheme, token = authorization.split()
46
  if scheme.lower() != 'bearer':
47
  raise HTTPException(status_code=401, detail="Invalid authentication scheme")
 
48
  if token != CYGNIS_API_KEY:
49
  print(f"⚠️ Invalid API Key: {token}")
50
  raise HTTPException(status_code=403, detail="Invalid API Key")
 
51
  except ValueError:
52
  raise HTTPException(status_code=401, detail="Invalid authorization header format")
53
 
 
 
54
  @app.get("/")
55
  def read_root():
56
  return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}
 
60
  print(f"📩 Received request: {req.question[:50]}...")
61
 
62
  if not HF_TOKEN:
63
+ print("❌ CRITICAL: HF_TOKEN is missing!")
64
  raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")
65
 
 
66
  model_id = MODELS.get(req.model, MODELS["default"])
 
67
  print(f"🤖 Routing request to: {model_id}")
68
 
 
69
  messages = []
70
  if req.system_prompt:
71
  messages.append({"role": "system", "content": req.system_prompt})
72
  messages.append({"role": "user", "content": req.question})
73
 
 
 
 
 
 
 
 
 
74
  headers = {
75
  "Authorization": f"Bearer {HF_TOKEN}",
76
  "Content-Type": "application/json"
77
  }
78
 
79
  try:
80
+ # 1. Tentative via endpoint Chat (OpenAI compatible)
81
+ # URL: https://router.huggingface.co/hf-inference/models/{model_id}/v1/chat/completions
82
+ hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
83
 
84
+ payload_chat = {
85
+ "model": model_id,
86
+ "messages": messages,
87
+ "max_tokens": req.max_tokens,
88
+ "temperature": req.temperature,
89
+ "stream": False
90
+ }
91
+
92
+ print(f"🚀 Calling HF Chat API: {hf_chat_url}")
93
+ response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
94
 
95
+ # 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
96
+ if response.status_code in [404, 405]:
97
+ print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
98
+
99
+ # URL: https://router.huggingface.co/hf-inference/models/{model_id}
100
+ api_url = f"{HF_ROUTER_BASE}/{model_id}"
101
 
102
  prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:"
103
 
 
113
 
114
  if response.status_code != 200:
115
  print(f"❌ HF Error ({response.status_code}): {response.text}")
 
116
  raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")
117
 
118
  data = response.json()
119
 
 
120
  answer = ""
121
  if "choices" in data and len(data["choices"]) > 0:
122
  answer = data["choices"][0]["message"]["content"]