import os
import pandas as pd
import numpy as np
import requests
import gradio as gr
import gdown
import pickle

BOOKS_FILE = "book.xlsx"
THESES_FILE = "theses.xlsx"

# روابط الملفات على Google Drive
DRIVE_LINKS = {
    "books": "1FElHiASfiVLeuHWYaqd2Q5foxWRlJT-O",
    "theses": "1K2Mtze6ZdvfKUsFMCOWlRBjDq-ZnJNrv"
}

def download_from_drive(file_id, output):
    url = f"https://drive.google.com/uc?export=download&id={file_id}"
    gdown.download(url, output, quiet=True)

# تنزيل الملفات إذا مش موجودة
if not os.path.exists(BOOKS_FILE):
    download_from_drive(DRIVE_LINKS["books"], BOOKS_FILE)

if not os.path.exists(THESES_FILE):
    download_from_drive(DRIVE_LINKS["theses"], THESES_FILE)

# قراءة البيانات
def load_data(file):
    df = pd.read_excel(file).fillna("غير متوافر")
    if "Title" not in df.columns and "العنوان" in df.columns:
        df["Title"] = df["العنوان"].astype(str)
    elif "Title" not in df.columns:
        df["Title"] = df.iloc[:,0].astype(str)
    return df

books_df = load_data(BOOKS_FILE)
theses_df = load_data(THESES_FILE)

API_TOKEN = os.environ.get("HF_TOKEN")
API_URL = "https://api-inference.huggingface.co/models/aelsaeed/all-MiniLM-L6-v2-api"
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

def get_embedding(text):
    response = requests.post(API_URL, headers=HEADERS, json={"inputs": [text]})
    return np.array(response.json()[0])

def search(query, category, mode):
    if not query.strip():
        return "⚠️ اكتب كلمة أو جملة للبحث"
    if mode == "نصي":
        df = books_df if category=="Books" else theses_df
        results = df[df["Title"].str.contains(query, case=False, na=False)]
    else:
        df = books_df if category=="Books" else theses_df
        emb_cache_file = f"{category}_embeddings.pkl"
        if os.path.exists(emb_cache_file):
            with open(emb_cache_file,"rb") as f:
                embeddings = pickle.load(f)
        else:
            embeddings = np.array([get_embedding(t) for t in df["Title"].tolist()])
            with open(emb_cache_file,"wb") as f:
                pickle.dump(embeddings,f)
        query_emb = get_embedding(query)
        scores = np.dot(embeddings, query_emb) / (np.linalg.norm(embeddings,axis=1)*np.linalg.norm(query_emb))
        idx = np.argsort(-scores)
        results = df.iloc[idx]
    if results.empty:
        return "❌ لم يتم العثور على نتائج"
    html = "<table border=1 style='border-collapse:collapse;width:100%;'>"
    html += "<tr>" + "".join([f"<th>{col}</th>" for col in results.columns]) + "</tr>"
    for _, row in results.iterrows():
        html += "<tr>" + "".join([f"<td>{val}</td>" for val in row.values]) + "</tr>"
    html += "</table>"
    return html

iface = gr.Interface(
    fn=search,
    inputs=[
        gr.Textbox(label="اكتب كلمة البحث"),
        gr.Dropdown(["Books","Theses"], label="الفئة"),
        gr.Radio(["نصي","دلالي"], label="نوع البحث")
    ],
    outputs="html",
    title="البحث في المكتبة الرقمية"
)

iface.launch()