Spaces:

Simonc-44
/

API

Running

App Files Files Community

API / main.py

Simonc-44

Update main.py

9a8344d verified 25 days ago

raw

history blame

6.22 kB

	from fastapi import FastAPI, HTTPException, Header, Depends
	from pydantic import BaseModel
	import requests
	import os
	import json
	from typing import Optional, Dict

	app = FastAPI(title="CygnisAI Studio API")

	# --- CONFIGURATION ---
	HF_TOKEN = os.environ.get("HF_TOKEN")
	CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")

	# Mapping vers des modèles DISPONIBLES et STABLES sur l'API d'inférence Hugging Face
	# Note: Les modèles gratuits peuvent être instables ou en chargement.
	MODELS = {
	# Gemma 2 9B (Google) - Très rapide et dispo
	"google/gemma-3-27b-it": "google/gemma-2-9b-it",

	# Llama 3.1 70B (Meta) - Puissant
	"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct",

	# Qwen 2.5 7B (Alibaba) - Excellent généraliste
	"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",

	# Phi 3.5 (Microsoft) - Léger
	"XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct",

	# DeepSeek R1 (Distill Llama 8B) - Raisonnement
	"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",

	# Llama 3.2 3B (Meta) - Ultra rapide
	"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct",

	# Llama 3.1 8B (Meta) - Standard
	"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct",

	# Défaut : Gemma 2B IT (très stable et rapide pour le free tier)
	"default": "google/gemma-2b-it"
	}

	# URL de base pour le endpoint Chat (OpenAI compatible)
	HF_CHAT_BASE = "https://router.huggingface.co/hf-inference/models"
	# URL de base pour l'API d'inférence standard
	HF_INFERENCE_API_BASE = "https://api-inference.huggingface.co/models"


	class ChatRequest(BaseModel):
	question: str
	model: Optional[str] = "default"
	system_prompt: Optional[str] = None
	temperature: Optional[float] = 0.7
	max_tokens: Optional[int] = 1024

	class ChatResponse(BaseModel):
	answer: str
	model_used: str
	sources: list = []

	async def verify_api_key(authorization: str = Header(None)):
	if not authorization:
	print("⚠️ Missing Authorization header")
	raise HTTPException(status_code=401, detail="Missing Authorization header")
	try:
	scheme, token = authorization.split()
	if scheme.lower() != 'bearer':
	raise HTTPException(status_code=401, detail="Invalid authentication scheme")
	if token != CYGNIS_API_KEY:
	print(f"⚠️ Invalid API Key: {token}")
	raise HTTPException(status_code=403, detail="Invalid API Key")
	except ValueError:
	raise HTTPException(status_code=401, detail="Invalid authorization header format")

	@app.get("/")
	def read_root():
	return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}

	@app.post("/api/ask", response_model=ChatResponse)
	async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
	print(f"📩 Received request: {req.question[:50]}...")

	if not HF_TOKEN:
	print("❌ CRITICAL: HF_TOKEN is missing!")
	raise HTTPException(status_code=500, detail="Server misconfiguration: HF_TOKEN is missing.")

	model_id = MODELS.get(req.model, MODELS["default"])
	print(f"🤖 Routing request to: {model_id}")

	messages = []
	if req.system_prompt:
	messages.append({"role": "system", "content": req.system_prompt})
	messages.append({"role": "user", "content": req.question})

	headers = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	try:
	# 1. Tentative via endpoint Chat (OpenAI compatible)
	hf_chat_url = f"{HF_CHAT_BASE}/{model_id}/v1/chat/completions"

	payload_chat = {
	"model": model_id,
	"messages": messages,
	"max_tokens": req.max_tokens,
	"temperature": req.temperature,
	"stream": False
	}

	print(f"🚀 Calling HF Chat API: {hf_chat_url}")
	response = requests.post(hf_chat_url, headers=headers, json=payload_chat)

	# 2. Fallback via endpoint Inference Standard (si Chat échoue avec 404 ou 405)
	if response.status_code in [404, 405]:
	print(f"🔄 Fallback to standard inference API (Status {response.status_code})")

	# Utilisation de l'URL correcte pour l'API d'inférence standard
	api_url = f"{HF_INFERENCE_API_BASE}/{model_id}"

	prompt_str = ""
	for msg in messages:
	prompt_str += f"{msg['role']}: {msg['content']}\n"
	prompt_str += "assistant:"

	payload_standard = {
	"inputs": prompt_str,
	"parameters": {
	"max_new_tokens": req.max_tokens,
	"temperature": req.temperature,
	"return_full_text": False
	}
	}
	response = requests.post(api_url, headers=headers, json=payload_standard)

	if response.status_code != 200:
	print(f"❌ HF Error ({response.status_code}): {response.text}")
	raise HTTPException(status_code=502, detail=f"HF Error: {response.text}")

	data = response.json()

	answer = ""
	if "choices" in data and len(data["choices"]) > 0:
	answer = data["choices"][0]["message"]["content"]
	elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
	answer = data[0]["generated_text"]
	elif "generated_text" in data:
	answer = data["generated_text"]
	else:
	print(f"⚠️ Unknown response format: {data}")
	answer = "Error: Could not parse model response."

	return {
	"answer": answer,
	"model_used": model_id,
	"sources": []
	}

	except Exception as e:
	print(f"❌ Internal Exception: {str(e)}")
	raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)