Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -81,9 +81,8 @@ def highlight_matching_words(text: str, query: str) -> str:
|
|
| 81 |
highlighted.append(token)
|
| 82 |
return ''.join(highlighted)
|
| 83 |
|
| 84 |
-
def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str, Any]]:
|
| 85 |
try:
|
| 86 |
-
query_vector = model.encode([query])[0]
|
| 87 |
client = weaviate.connect_to_weaviate_cloud(
|
| 88 |
cluster_url=WEAVIATE_URL,
|
| 89 |
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
|
@@ -94,15 +93,42 @@ def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str
|
|
| 94 |
if books:
|
| 95 |
selected_books = [VULGATE_BOOKS[book] for book in books]
|
| 96 |
filter_condition = Filter.by_property("book").contains_any(selected_books)
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
results = []
|
| 104 |
for obj in response.objects:
|
| 105 |
highlighted_text = highlight_matching_words(obj.properties["text"], query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
results.append({
|
| 107 |
"Reference": f"{obj.properties['book']} {obj.properties['chapter']}:{obj.properties['verse']}",
|
| 108 |
"Book": obj.properties["book"],
|
|
@@ -110,7 +136,7 @@ def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str
|
|
| 110 |
"Verse": obj.properties["verse"],
|
| 111 |
"Text": highlighted_text,
|
| 112 |
"RawText": obj.properties["text"],
|
| 113 |
-
"Similarity": round(
|
| 114 |
})
|
| 115 |
return results
|
| 116 |
finally:
|
|
@@ -124,7 +150,18 @@ def format_results_html(results: List[Dict[str, Any]]) -> str:
|
|
| 124 |
if "Error" in results[0]:
|
| 125 |
return f'<div style="color:red">Error: {results[0]["Error"]}</div>'
|
| 126 |
html = [
|
| 127 |
-
'<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
'<table style="border-collapse:collapse;width:100%;font-size:1em;">',
|
| 129 |
'<thead><tr>'
|
| 130 |
'<th>Reference</th><th>Text</th><th>Similarity</th><th>Book</th><th>Chapter</th><th>Verse</th>'
|
|
@@ -142,10 +179,10 @@ def format_results_html(results: List[Dict[str, Any]]) -> str:
|
|
| 142 |
html.append('</tbody></table>')
|
| 143 |
return ''.join(html)
|
| 144 |
|
| 145 |
-
def search(query: str, books: List[str], limit: int) -> str:
|
| 146 |
if not query.strip():
|
| 147 |
return "<div>Please enter a search query.</div>"
|
| 148 |
-
results = find_similar(query, books, limit)
|
| 149 |
return format_results_html(results)
|
| 150 |
|
| 151 |
with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
|
|
@@ -170,26 +207,32 @@ with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
|
|
| 170 |
multiselect=True
|
| 171 |
)
|
| 172 |
with gr.Row():
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
with gr.Row():
|
| 181 |
search_btn = gr.Button("Search", variant="primary")
|
| 182 |
output = gr.HTML(label="Results")
|
| 183 |
|
| 184 |
-
|
| 185 |
search_btn.click(
|
| 186 |
fn=search,
|
| 187 |
-
inputs=[query, book_select, limit],
|
| 188 |
outputs=output
|
| 189 |
)
|
| 190 |
query.submit(
|
| 191 |
fn=search,
|
| 192 |
-
inputs=[query, book_select, limit],
|
| 193 |
outputs=output
|
| 194 |
)
|
| 195 |
if __name__ == "__main__":
|
|
|
|
| 81 |
highlighted.append(token)
|
| 82 |
return ''.join(highlighted)
|
| 83 |
|
| 84 |
+
def find_similar(query: str, books: List[str], limit: int = 50, search_method: str = "vector") -> List[Dict[str, Any]]:
|
| 85 |
try:
|
|
|
|
| 86 |
client = weaviate.connect_to_weaviate_cloud(
|
| 87 |
cluster_url=WEAVIATE_URL,
|
| 88 |
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
|
|
|
| 93 |
if books:
|
| 94 |
selected_books = [VULGATE_BOOKS[book] for book in books]
|
| 95 |
filter_condition = Filter.by_property("book").contains_any(selected_books)
|
| 96 |
+
|
| 97 |
+
# Always encode the query vector since we need it for both vector and hybrid search
|
| 98 |
+
query_vector = model.encode([query])[0]
|
| 99 |
+
|
| 100 |
+
if search_method == "vector":
|
| 101 |
+
response = vulgate.query.near_vector(
|
| 102 |
+
near_vector=query_vector,
|
| 103 |
+
limit=limit,
|
| 104 |
+
return_metadata=MetadataQuery(distance=True),
|
| 105 |
+
filters=filter_condition
|
| 106 |
+
)
|
| 107 |
+
elif search_method == "bm25":
|
| 108 |
+
response = vulgate.query.bm25(
|
| 109 |
+
query=query,
|
| 110 |
+
limit=limit,
|
| 111 |
+
filters=filter_condition
|
| 112 |
+
)
|
| 113 |
+
else: # hybrid
|
| 114 |
+
response = vulgate.query.hybrid(
|
| 115 |
+
query=query,
|
| 116 |
+
vector=query_vector,
|
| 117 |
+
limit=limit,
|
| 118 |
+
filters=filter_condition
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
results = []
|
| 122 |
for obj in response.objects:
|
| 123 |
highlighted_text = highlight_matching_words(obj.properties["text"], query)
|
| 124 |
+
|
| 125 |
+
# Handle different types of scores
|
| 126 |
+
similarity = 1.0 # default value
|
| 127 |
+
if hasattr(obj.metadata, 'distance') and obj.metadata.distance is not None:
|
| 128 |
+
similarity = 1 - obj.metadata.distance
|
| 129 |
+
elif hasattr(obj.metadata, 'score') and obj.metadata.score is not None:
|
| 130 |
+
similarity = obj.metadata.score
|
| 131 |
+
|
| 132 |
results.append({
|
| 133 |
"Reference": f"{obj.properties['book']} {obj.properties['chapter']}:{obj.properties['verse']}",
|
| 134 |
"Book": obj.properties["book"],
|
|
|
|
| 136 |
"Verse": obj.properties["verse"],
|
| 137 |
"Text": highlighted_text,
|
| 138 |
"RawText": obj.properties["text"],
|
| 139 |
+
"Similarity": round(similarity, 3)
|
| 140 |
})
|
| 141 |
return results
|
| 142 |
finally:
|
|
|
|
| 150 |
if "Error" in results[0]:
|
| 151 |
return f'<div style="color:red">Error: {results[0]["Error"]}</div>'
|
| 152 |
html = [
|
| 153 |
+
'<style>',
|
| 154 |
+
'/* Light mode styles */',
|
| 155 |
+
'td,th{padding:8px;}th{background:#f4f1e9;}tr:nth-child(even){background:#f9f9f9;}tr:hover{background:#e6e2d3;}table{border-radius:8px;overflow:hidden;box-shadow:0 2px 8px #e6e2d3;}td{vertical-align:top;}',
|
| 156 |
+
'/* Dark mode styles */',
|
| 157 |
+
'@media (prefers-color-scheme: dark) {',
|
| 158 |
+
' th { background: #232323; color: #f4f1e9; }',
|
| 159 |
+
' tr:nth-child(even) { background: #232323; }',
|
| 160 |
+
' tr:hover { background: #333333; }',
|
| 161 |
+
' table { box-shadow: 0 2px 8px #111; }',
|
| 162 |
+
' td { color: #f4f1e9; }',
|
| 163 |
+
'}',
|
| 164 |
+
'</style>',
|
| 165 |
'<table style="border-collapse:collapse;width:100%;font-size:1em;">',
|
| 166 |
'<thead><tr>'
|
| 167 |
'<th>Reference</th><th>Text</th><th>Similarity</th><th>Book</th><th>Chapter</th><th>Verse</th>'
|
|
|
|
| 179 |
html.append('</tbody></table>')
|
| 180 |
return ''.join(html)
|
| 181 |
|
| 182 |
+
def search(query: str, books: List[str], limit: int, search_method: str) -> str:
|
| 183 |
if not query.strip():
|
| 184 |
return "<div>Please enter a search query.</div>"
|
| 185 |
+
results = find_similar(query, books, limit, search_method)
|
| 186 |
return format_results_html(results)
|
| 187 |
|
| 188 |
with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
|
|
|
|
| 207 |
multiselect=True
|
| 208 |
)
|
| 209 |
with gr.Row():
|
| 210 |
+
with gr.Column(scale=1):
|
| 211 |
+
search_method = gr.Radio(
|
| 212 |
+
choices=["vector", "bm25", "hybrid"],
|
| 213 |
+
label="Search Method",
|
| 214 |
+
value="vector"
|
| 215 |
+
)
|
| 216 |
+
with gr.Column(scale=1):
|
| 217 |
+
limit = gr.Slider(
|
| 218 |
+
minimum=1,
|
| 219 |
+
maximum=50,
|
| 220 |
+
value=20,
|
| 221 |
+
step=1,
|
| 222 |
+
label="Number of results"
|
| 223 |
+
)
|
| 224 |
with gr.Row():
|
| 225 |
search_btn = gr.Button("Search", variant="primary")
|
| 226 |
output = gr.HTML(label="Results")
|
| 227 |
|
|
|
|
| 228 |
search_btn.click(
|
| 229 |
fn=search,
|
| 230 |
+
inputs=[query, book_select, limit, search_method],
|
| 231 |
outputs=output
|
| 232 |
)
|
| 233 |
query.submit(
|
| 234 |
fn=search,
|
| 235 |
+
inputs=[query, book_select, limit, search_method],
|
| 236 |
outputs=output
|
| 237 |
)
|
| 238 |
if __name__ == "__main__":
|