from fastapi import FastAPI, HTTPException, Header, Depends
from pydantic import BaseModel
import requests
import os
import json
from typing import Optional, Dict

app = FastAPI(title="CygnisAI Studio API")

# --- CONFIGURATION ---
HF_TOKEN = os.environ.get("HF_TOKEN")
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345")

# Mapping vers des modèles NON-GATED et POPULAIRES
MODELS = {
    "google/gemma-3-27b-it": "google/gemma-2-9b-it", 
    "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", 
    "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct",
    "XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct", 
    "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", 
    "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct", 
    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct", 
    
    # Défaut : Qwen 2.5 (Très robuste et souvent dispo)
    "default": "Qwen/Qwen2.5-7B-Instruct" 
}

# Modèle de secours ultime (Microsoft Phi 3.5 est très léger et souvent dispo)
SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct"

# URL de base UNIQUE pour le routeur HF
HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models"

class ChatRequest(BaseModel):
    question: str
    model: Optional[str] = "default"
    system_prompt: Optional[str] = None
    temperature: Optional[float] = 0.7
    max_tokens: Optional[int] = 1024

class ChatResponse(BaseModel):
    answer: str
    model_used: str
    sources: list = []

async def verify_api_key(authorization: str = Header(None)):
    if not authorization:
        print("⚠️ Missing Authorization header")
        # On ne bloque pas pour faciliter le debug, mais on log
    try:
        scheme, token = authorization.split()
        if scheme.lower() != 'bearer':
            raise HTTPException(status_code=401, detail="Invalid authentication scheme")
        if token != CYGNIS_API_KEY:
            print(f"⚠️ Invalid API Key: {token}")
            # raise HTTPException(status_code=403, detail="Invalid API Key") # Commenté pour debug
    except ValueError:
        pass # On laisse passer pour le moment

@app.get("/")
def read_root():
    return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)}

def call_hf_api(model_id, messages, req):
    """Fonction helper pour appeler l'API HF avec gestion Chat/Standard"""
    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json"
    }
    
    # 1. Tentative Chat API
    hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions"
    payload_chat = {
        "model": model_id,
        "messages": messages,
        "max_tokens": req.max_tokens,
        "temperature": req.temperature,
        "stream": False
    }
    
    print(f"🚀 Calling HF Chat API: {hf_chat_url}")
    response = requests.post(hf_chat_url, headers=headers, json=payload_chat)
    
    # 2. Fallback Standard API
    if response.status_code in [404, 405]:
        print(f"🔄 Fallback to standard inference API (Status {response.status_code})")
        api_url = f"{HF_ROUTER_BASE}/{model_id}"
        
        prompt_str = ""
        for msg in messages:
            role = msg['role']
            content = msg['content']
            if role == 'system': prompt_str += f"<|system|>\n{content}\n"
            elif role == 'user': prompt_str += f"<|user|>\n{content}\n"
            elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n"
        prompt_str += "<|assistant|>\n"
        
        payload_standard = {
            "inputs": prompt_str,
            "parameters": {
                "max_new_tokens": req.max_tokens,
                "temperature": req.temperature,
                "return_full_text": False
            }
        }
        print(f"🚀 Calling HF Standard API: {api_url}")
        response = requests.post(api_url, headers=headers, json=payload_standard)
        
    return response

@app.post("/api/ask", response_model=ChatResponse)
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)):
    print(f"📩 Received request: {req.question[:50]}...")

    if not HF_TOKEN:
        print("❌ CRITICAL: HF_TOKEN is missing!")
        # Mock response instead of crash
        return {
            "answer": "Configuration Error: HF_TOKEN is missing on the server.",
            "model_used": "error-handler",
            "sources": []
        }
    
    model_id = MODELS.get(req.model, MODELS["default"])
    print(f"🤖 Routing request to: {model_id}")

    messages = []
    if req.system_prompt:
        messages.append({"role": "system", "content": req.system_prompt})
    messages.append({"role": "user", "content": req.question})

    try:
        # Premier essai
        response = call_hf_api(model_id, messages, req)

        # Si échec, Safety Net
        if response.status_code != 200:
            print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}")
            model_id = SAFETY_NET_MODEL
            response = call_hf_api(SAFETY_NET_MODEL, messages, req)

        # Si tout échoue, Mock Response (ULTIMATE FALLBACK)
        if response.status_code != 200:
            print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}")
            return {
                "answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.",
                "model_used": "fallback-mock",
                "sources": []
            }

        data = response.json()
        
        answer = ""
        if "choices" in data and len(data["choices"]) > 0:
            answer = data["choices"][0]["message"]["content"]
        elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]:
            answer = data[0]["generated_text"]
        elif "generated_text" in data:
            answer = data["generated_text"]
        else:
            print(f"⚠️ Unknown response format: {data}")
            answer = "Error: Could not parse model response."

        return {
            "answer": answer,
            "model_used": model_id,
            "sources": []
        }

    except Exception as e:
        print(f"❌ Internal Exception: {str(e)}")
        # Mock response on crash
        return {
            "answer": "Une erreur interne inattendue s'est produite. Mes excuses.",
            "model_used": "exception-handler",
            "sources": []
        }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)