aelsaeed commited on
Commit
062b083
·
verified ·
1 Parent(s): cc686ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -60
app.py CHANGED
@@ -1,60 +1,46 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import numpy as np
4
- import pickle
5
- from sentence_transformers import SentenceTransformer, util
6
- import os
7
-
8
- # =========================
9
- # قراءة البيانات وEmbeddings من ملفات موجودة داخل Space
10
- # =========================
11
- with open("books.pkl", "rb") as f:
12
- books = pickle.load(f)
13
-
14
- with open("theses.pkl", "rb") as f:
15
- theses = pickle.load(f)
16
-
17
- books["type"] = "Book"
18
- theses["type"] = "Thesis"
19
- library = pd.concat([books, theses], ignore_index=True)
20
-
21
- with open("books_embeddings.pkl", "rb") as f:
22
- books_emb = pickle.load(f)
23
-
24
- with open("theses_embeddings.pkl", "rb") as f:
25
- theses_emb = pickle.load(f)
26
-
27
- library_embeddings = np.vstack([books_emb, theses_emb])
28
-
29
- # =========================
30
- # تحميل موديل MiniLM من فولدر موجود داخل Space
31
- # =========================
32
- model = SentenceTransformer("AI_Library_Model")
33
-
34
- # =========================
35
- # دالة البحث
36
- # =========================
37
- def search_library(query, source_type):
38
- filtered = library[library["type"] == source_type]
39
- filtered_emb = library_embeddings[filtered.index]
40
-
41
- query_emb = model.encode(query)
42
- scores = util.cos_sim(query_emb, filtered_emb)[0]
43
- top_idx = np.argsort(scores.cpu().numpy())[::-1][:5]
44
-
45
- results = filtered.iloc[top_idx][["title","author","year","field"]]
46
- return results
47
-
48
- # =========================
49
- # واجهة Gradio تاب واحدة
50
- # =========================
51
- with gr.Blocks() as demo:
52
- gr.Markdown("## مكتبة AI Explorer - البحث الحر المحلي")
53
- with gr.Row():
54
- query = gr.Textbox(label="اكتب سؤال البحث")
55
- source = gr.Dropdown(choices=["Book", "Thesis"], label="نوع المصدر")
56
- btn = gr.Button("بحث")
57
- output = gr.Dataframe(headers=["العنوان","المؤلف","السنة","المجال"])
58
- btn.click(search_library, inputs=[query, source], outputs=output)
59
-
60
- demo.launch()
 
1
+ import joblib
2
+ import gradio as gr
3
+ import numpy as np
4
+
5
+ # =========================
6
+ # تحميل البيانات
7
+ # =========================
8
+ books = joblib.load("books.pkl")
9
+ theses = joblib.load("Theses.pkl")
10
+ books_emb = joblib.load("books_embeddings.pkl")
11
+ theses_emb = joblib.load("theses_embeddings.pkl")
12
+
13
+ # =========================
14
+ # مثال دالة بحث بسيطة
15
+ # =========================
16
+ from sklearn.metrics.pairwise import cosine_similarity
17
+
18
+ def search_library(query_embedding, top_k=5):
19
+ # دمج كل Embeddings
20
+ all_emb = np.vstack([books_emb, theses_emb])
21
+ similarity = cosine_similarity([query_embedding], all_emb)[0]
22
+ top_indices = similarity.argsort()[-top_k:][::-1]
23
+
24
+ results = []
25
+ for idx in top_indices:
26
+ if idx < len(books):
27
+ results.append(books[idx])
28
+ else:
29
+ results.append(theses[idx - len(books)])
30
+ return results
31
+
32
+ # =========================
33
+ # واجهة Gradio
34
+ # =========================
35
+ def search_interface(query_embedding):
36
+ return search_library(query_embedding)
37
+
38
+ demo = gr.Interface(
39
+ fn=search_interface,
40
+ inputs=gr.inputs.Dataframe(), # هنا ممكن تغيّري حسب نوع المدخل
41
+ outputs="text",
42
+ title="Library Search",
43
+ description="بحث في مكتبة الكتب والرسائل"
44
+ )
45
+
46
+ demo.launch()