Spaces:

aelsaeed
/

LibrarySearchSpace

Running

File size: 3,252 Bytes

66bea79

# =========================
# تثبيت المكتبات المطلوبة
# =========================
!pip install -q gradio pandas numpy gdown sentence-transformers

import gdown
import pickle
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import os
import zipfile

# =========================
# روابط الملفات على Google Drive
# =========================
files = {
    "books": "https://drive.google.com/uc?id=1v5UJy9ePQccyquFygHisFVBHS56KhlhQ",
    "theses": "https://drive.google.com/uc?id=1i2Rrr9gfcNj4QIqfQ8AX8ZlO00whbnKJ",
    "books_embeddings": "https://drive.google.com/uc?id=1uH5PVUGXYENeqGUaikQ55wtDVcU_04qC",
    "theses_embeddings": "https://drive.google.com/uc?id=1cYZtgYPzoWxePOW9B3bIdqANOI890rs2",
    "model_zip": "https://drive.google.com/uc?id=18D4xtDTKCWeGaZ3lsYW7_D79xo95daDL"
}

# =========================
# تحميل الملفات
# =========================
for name, url in files.items():
    output = f"{name}.pkl" if name != "model_zip" else "model.zip"
    if not os.path.exists(output):
        gdown.download(url, output, quiet=False)

# =========================
# فك ضغط الموديل
# =========================
if not os.path.exists("AI_Library_Model"):
    with zipfile.ZipFile("model.zip", "r") as zip_ref:
        zip_ref.extractall("AI_Library_Model")

# =========================
# قراءة البيانات وEmbeddings
# =========================
with open("books.pkl", "rb") as f:
    books = pickle.load(f)

with open("theses.pkl", "rb") as f:
    theses = pickle.load(f)

books["type"] = "Book"
theses["type"] = "Thesis"
library = pd.concat([books, theses], ignore_index=True)

with open("books_embeddings.pkl", "rb") as f:
    books_emb = pickle.load(f)

with open("theses_embeddings.pkl", "rb") as f:
    theses_emb = pickle.load(f)

library_embeddings = np.vstack([books_emb, theses_emb])

# =========================
# تحميل موديل MiniLM من الفولدر المحلي
# =========================
model = SentenceTransformer("AI_Library_Model")

# =========================
# دالة البحث
# =========================
def search_library(query, source_type):
    filtered = library[library["type"] == source_type]
    filtered_emb = library_embeddings[filtered.index]

    query_emb = model.encode(query)
    scores = util.cos_sim(query_emb, filtered_emb)[0]
    top_idx = np.argsort(scores.cpu().numpy())[::-1][:5]

    results = filtered.iloc[top_idx][["title","author","year","field"]]
    return results

# =========================
# واجهة Gradio تاب واحدة
# =========================
with gr.Blocks() as demo:
    gr.Markdown("## مكتبة AI Explorer - البحث الحر المحلي")
    with gr.Row():
        query = gr.Textbox(label="اكتب سؤال البحث")
        source = gr.Dropdown(choices=["Book", "Thesis"], label="نوع المصدر")
        btn = gr.Button("بحث")
    output = gr.Dataframe(headers=["العنوان","المؤلف","السنة","المجال"])
    btn.click(search_library, inputs=[query, source], outputs=output)

demo.launch()