|
|
from fastapi import FastAPI, HTTPException, Header, Depends |
|
|
from pydantic import BaseModel |
|
|
import requests |
|
|
import os |
|
|
import json |
|
|
from typing import Optional, Dict |
|
|
|
|
|
app = FastAPI(title="CygnisAI Studio API") |
|
|
|
|
|
|
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
|
|
|
|
|
|
|
CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"google/gemma-3-27b-it": "google/gemma-2-27b-it", |
|
|
"openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", |
|
|
"Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2-VL-7B-Instruct", |
|
|
"XiaomiMiMo/MiMo-V2-Flash": "Xiaomi/MIMO", |
|
|
"deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-V3", |
|
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct", |
|
|
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", |
|
|
"default": "meta-llama/Meta-Llama-3-8B-Instruct" |
|
|
} |
|
|
|
|
|
|
|
|
HF_INFERENCE_BASE = "https://router.huggingface.co/hf-inference/models" |
|
|
|
|
|
|
|
|
class ChatRequest(BaseModel): |
|
|
question: str |
|
|
model: Optional[str] = "default" |
|
|
system_prompt: Optional[str] = None |
|
|
temperature: Optional[float] = 0.7 |
|
|
max_tokens: Optional[int] = 1024 |
|
|
|
|
|
class ChatResponse(BaseModel): |
|
|
answer: str |
|
|
model_used: str |
|
|
sources: list = [] |
|
|
|
|
|
|
|
|
async def verify_api_key(authorization: str = Header(None)): |
|
|
if not authorization: |
|
|
raise HTTPException(status_code=401, detail="Missing Authorization header") |
|
|
|
|
|
try: |
|
|
scheme, token = authorization.split() |
|
|
if scheme.lower() != 'bearer': |
|
|
raise HTTPException(status_code=401, detail="Invalid authentication scheme") |
|
|
|
|
|
if token != CYGNIS_API_KEY: |
|
|
raise HTTPException(status_code=403, detail="Invalid API Key") |
|
|
|
|
|
except ValueError: |
|
|
raise HTTPException(status_code=401, detail="Invalid authorization header format") |
|
|
|
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return {"status": "online", "service": "CygnisAI Studio API"} |
|
|
|
|
|
@app.post("/api/ask", response_model=ChatResponse) |
|
|
async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)): |
|
|
if not HF_TOKEN: |
|
|
print("⚠️ WARNING: HF_TOKEN not set. Calls to HF will fail.") |
|
|
|
|
|
|
|
|
model_id = MODELS.get(req.model, MODELS["default"]) |
|
|
api_url = f"{HF_INFERENCE_BASE}/{model_id}" |
|
|
|
|
|
print(f"🤖 Routing request to: {model_id}") |
|
|
|
|
|
|
|
|
|
|
|
messages = [] |
|
|
if req.system_prompt: |
|
|
messages.append({"role": "system", "content": req.system_prompt}) |
|
|
messages.append({"role": "user", "content": req.question}) |
|
|
|
|
|
payload = { |
|
|
"model": model_id, |
|
|
"messages": messages, |
|
|
"max_tokens": req.max_tokens, |
|
|
"temperature": req.temperature, |
|
|
"stream": False |
|
|
} |
|
|
|
|
|
headers = { |
|
|
"Authorization": f"Bearer {HF_TOKEN}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
hf_chat_url = f"{HF_INFERENCE_BASE}/{model_id}/v1/chat/completions" |
|
|
|
|
|
response = requests.post(hf_chat_url, headers=headers, json=payload) |
|
|
|
|
|
|
|
|
if response.status_code == 404: |
|
|
print("🔄 Fallback to standard inference API") |
|
|
|
|
|
|
|
|
prompt_str = f"System: {req.system_prompt}\nUser: {req.question}\nAssistant:" if req.system_prompt else f"User: {req.question}\nAssistant:" |
|
|
|
|
|
payload_standard = { |
|
|
"inputs": prompt_str, |
|
|
"parameters": { |
|
|
"max_new_tokens": req.max_tokens, |
|
|
"temperature": req.temperature, |
|
|
"return_full_text": False |
|
|
} |
|
|
} |
|
|
response = requests.post(api_url, headers=headers, json=payload_standard) |
|
|
|
|
|
if response.status_code != 200: |
|
|
print(f"❌ HF Error ({response.status_code}): {response.text}") |
|
|
raise HTTPException(status_code=502, detail=f"Model provider error: {response.text}") |
|
|
|
|
|
data = response.json() |
|
|
|
|
|
|
|
|
answer = "" |
|
|
if "choices" in data and len(data["choices"]) > 0: |
|
|
answer = data["choices"][0]["message"]["content"] |
|
|
elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]: |
|
|
answer = data[0]["generated_text"] |
|
|
elif "generated_text" in data: |
|
|
answer = data["generated_text"] |
|
|
else: |
|
|
answer = "Error: Could not parse model response." |
|
|
|
|
|
return { |
|
|
"answer": answer, |
|
|
"model_used": model_id, |
|
|
"sources": [] |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Internal Error: {str(e)}") |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|