|
|
from fastapi import FastAPI, HTTPException, Header, Depends |
|
|
from pydantic import BaseModel |
|
|
import requests |
|
|
import os |
|
|
import json |
|
|
from typing import Optional, Dict |
|
|
|
|
|
app = FastAPI(title="CygnisAI Studio API") |
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345") |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"google/gemma-3-27b-it": "google/gemma-2-9b-it", |
|
|
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", |
|
|
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct", |
|
|
"XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct", |
|
|
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", |
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct", |
|
|
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct", |
|
|
|
|
|
|
|
|
"default": "Qwen/Qwen2.5-7B-Instruct" |
|
|
} |
|
|
|
|
|
|
|
|
SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct" |
|
|
|
|
|
|
|
|
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models" |
|
|
|
|
|
class ChatRequest(BaseModel): |
|
|
question: str |
|
|
model: Optional[str] = "default" |
|
|
system_prompt: Optional[str] = None |
|
|
temperature: Optional[float] = 0.7 |
|
|
max_tokens: Optional[int] = 1024 |
|
|
|
|
|
class ChatResponse(BaseModel): |
|
|
answer: str |
|
|
model_used: str |
|
|
sources: list = [] |
|
|
|
|
|
async def verify_api_key(authorization: str = Header(None)): |
|
|
if not authorization: |
|
|
print("⚠️ Missing Authorization header") |
|
|
|
|
|
try: |
|
|
scheme, token = authorization.split() |
|
|
if scheme.lower() != 'bearer': |
|
|
raise HTTPException(status_code=401, detail="Invalid authentication scheme") |
|
|
if token != CYGNIS_API_KEY: |
|
|
print(f"⚠️ Invalid API Key: {token}") |
|
|
|
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)} |
|
|
|
|
|
def call_hf_api(model_id, messages, req): |
|
|
"""Fonction helper pour appeler l'API HF avec gestion Chat/Standard""" |
|
|
headers = { |
|
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
|
|
|
|
|
|
hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions" |
|
|
payload_chat = { |
|
|
"model": model_id, |
|
|
"messages": messages, |
|
|
"max_tokens": req.max_tokens, |
|
|
"temperature": req.temperature, |
|
|
"stream": False |
|
|
} |
|
|
|
|
|
print(f"🚀 Calling HF Chat API: {hf_chat_url}") |
|
|
response = requests.post(hf_chat_url, headers=headers, json=payload_chat) |
|
|
|
|
|
|
|
|
if response.status_code in [404, 405]: |
|
|
print(f"🔄 Fallback to standard inference API (Status {response.status_code})") |
|
|
api_url = f"{HF_ROUTER_BASE}/{model_id}" |
|
|
|
|
|
prompt_str = "" |
|
|
for msg in messages: |
|
|
role = msg['role'] |
|
|
content = msg['content'] |
|
|
if role == 'system': prompt_str += f"<|system|>\n{content}\n" |
|
|
elif role == 'user': prompt_str += f"<|user|>\n{content}\n" |
|
|
elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n" |
|
|
prompt_str += "<|assistant|>\n" |
|
|
|
|
|
payload_standard = { |
|
|
"inputs": prompt_str, |
|
|
"parameters": { |
|
|
"max_new_tokens": req.max_tokens, |
|
|
"temperature": req.temperature, |
|
|
"return_full_text": False |
|
|
} |
|
|
} |
|
|
print(f"🚀 Calling HF Standard API: {api_url}") |
|
|
response = requests.post(api_url, headers=headers, json=payload_standard) |
|
|
|
|
|
return response |
|
|
|
|
|
@app.post("/api/ask", response_model=ChatResponse) |
|
|
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)): |
|
|
print(f"📩 Received request: {req.question[:50]}...") |
|
|
|
|
|
if not HF_TOKEN: |
|
|
print("❌ CRITICAL: HF_TOKEN is missing!") |
|
|
|
|
|
return { |
|
|
"answer": "Configuration Error: HF_TOKEN is missing on the server.", |
|
|
"model_used": "error-handler", |
|
|
"sources": [] |
|
|
} |
|
|
|
|
|
model_id = MODELS.get(req.model, MODELS["default"]) |
|
|
print(f"🤖 Routing request to: {model_id}") |
|
|
|
|
|
messages = [] |
|
|
if req.system_prompt: |
|
|
messages.append({"role": "system", "content": req.system_prompt}) |
|
|
messages.append({"role": "user", "content": req.question}) |
|
|
|
|
|
try: |
|
|
|
|
|
response = call_hf_api(model_id, messages, req) |
|
|
|
|
|
|
|
|
if response.status_code != 200: |
|
|
print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}") |
|
|
model_id = SAFETY_NET_MODEL |
|
|
response = call_hf_api(SAFETY_NET_MODEL, messages, req) |
|
|
|
|
|
|
|
|
if response.status_code != 200: |
|
|
print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}") |
|
|
return { |
|
|
"answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.", |
|
|
"model_used": "fallback-mock", |
|
|
"sources": [] |
|
|
} |
|
|
|
|
|
data = response.json() |
|
|
|
|
|
answer = "" |
|
|
if "choices" in data and len(data["choices"]) > 0: |
|
|
answer = data["choices"][0]["message"]["content"] |
|
|
elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]: |
|
|
answer = data[0]["generated_text"] |
|
|
elif "generated_text" in data: |
|
|
answer = data["generated_text"] |
|
|
else: |
|
|
print(f"⚠️ Unknown response format: {data}") |
|
|
answer = "Error: Could not parse model response." |
|
|
|
|
|
return { |
|
|
"answer": answer, |
|
|
"model_used": model_id, |
|
|
"sources": [] |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Internal Exception: {str(e)}") |
|
|
|
|
|
return { |
|
|
"answer": "Une erreur interne inattendue s'est produite. Mes excuses.", |
|
|
"model_used": "exception-handler", |
|
|
"sources": [] |
|
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|