aelsaeed's picture
Upload app.py
66bea79 verified
raw
history blame
3.25 kB
# =========================
# تثبيت المكتبات المطلوبة
# =========================
!pip install -q gradio pandas numpy gdown sentence-transformers
import gdown
import pickle
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import os
import zipfile
# =========================
# روابط الملفات على Google Drive
# =========================
files = {
"books": "https://drive.google.com/uc?id=1v5UJy9ePQccyquFygHisFVBHS56KhlhQ",
"theses": "https://drive.google.com/uc?id=1i2Rrr9gfcNj4QIqfQ8AX8ZlO00whbnKJ",
"books_embeddings": "https://drive.google.com/uc?id=1uH5PVUGXYENeqGUaikQ55wtDVcU_04qC",
"theses_embeddings": "https://drive.google.com/uc?id=1cYZtgYPzoWxePOW9B3bIdqANOI890rs2",
"model_zip": "https://drive.google.com/uc?id=18D4xtDTKCWeGaZ3lsYW7_D79xo95daDL"
}
# =========================
# تحميل الملفات
# =========================
for name, url in files.items():
output = f"{name}.pkl" if name != "model_zip" else "model.zip"
if not os.path.exists(output):
gdown.download(url, output, quiet=False)
# =========================
# فك ضغط الموديل
# =========================
if not os.path.exists("AI_Library_Model"):
with zipfile.ZipFile("model.zip", "r") as zip_ref:
zip_ref.extractall("AI_Library_Model")
# =========================
# قراءة البيانات وEmbeddings
# =========================
with open("books.pkl", "rb") as f:
books = pickle.load(f)
with open("theses.pkl", "rb") as f:
theses = pickle.load(f)
books["type"] = "Book"
theses["type"] = "Thesis"
library = pd.concat([books, theses], ignore_index=True)
with open("books_embeddings.pkl", "rb") as f:
books_emb = pickle.load(f)
with open("theses_embeddings.pkl", "rb") as f:
theses_emb = pickle.load(f)
library_embeddings = np.vstack([books_emb, theses_emb])
# =========================
# تحميل موديل MiniLM من الفولدر المحلي
# =========================
model = SentenceTransformer("AI_Library_Model")
# =========================
# دالة البحث
# =========================
def search_library(query, source_type):
filtered = library[library["type"] == source_type]
filtered_emb = library_embeddings[filtered.index]
query_emb = model.encode(query)
scores = util.cos_sim(query_emb, filtered_emb)[0]
top_idx = np.argsort(scores.cpu().numpy())[::-1][:5]
results = filtered.iloc[top_idx][["title","author","year","field"]]
return results
# =========================
# واجهة Gradio تاب واحدة
# =========================
with gr.Blocks() as demo:
gr.Markdown("## مكتبة AI Explorer - البحث الحر المحلي")
with gr.Row():
query = gr.Textbox(label="اكتب سؤال البحث")
source = gr.Dropdown(choices=["Book", "Thesis"], label="نوع المصدر")
btn = gr.Button("بحث")
output = gr.Dataframe(headers=["العنوان","المؤلف","السنة","المجال"])
btn.click(search_library, inputs=[query, source], outputs=output)
demo.launch()