wjbmattingly commited on
Commit
fd6bf44
·
verified ·
1 Parent(s): 55c8c77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -22
app.py CHANGED
@@ -81,9 +81,8 @@ def highlight_matching_words(text: str, query: str) -> str:
81
  highlighted.append(token)
82
  return ''.join(highlighted)
83
 
84
- def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str, Any]]:
85
  try:
86
- query_vector = model.encode([query])[0]
87
  client = weaviate.connect_to_weaviate_cloud(
88
  cluster_url=WEAVIATE_URL,
89
  auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
@@ -94,15 +93,42 @@ def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str
94
  if books:
95
  selected_books = [VULGATE_BOOKS[book] for book in books]
96
  filter_condition = Filter.by_property("book").contains_any(selected_books)
97
- response = vulgate.query.near_vector(
98
- near_vector=query_vector,
99
- limit=limit,
100
- return_metadata=MetadataQuery(distance=True),
101
- filters=filter_condition
102
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  results = []
104
  for obj in response.objects:
105
  highlighted_text = highlight_matching_words(obj.properties["text"], query)
 
 
 
 
 
 
 
 
106
  results.append({
107
  "Reference": f"{obj.properties['book']} {obj.properties['chapter']}:{obj.properties['verse']}",
108
  "Book": obj.properties["book"],
@@ -110,7 +136,7 @@ def find_similar(query: str, books: List[str], limit: int = 50) -> List[Dict[str
110
  "Verse": obj.properties["verse"],
111
  "Text": highlighted_text,
112
  "RawText": obj.properties["text"],
113
- "Similarity": round(1 - obj.metadata.distance, 3)
114
  })
115
  return results
116
  finally:
@@ -124,7 +150,18 @@ def format_results_html(results: List[Dict[str, Any]]) -> str:
124
  if "Error" in results[0]:
125
  return f'<div style="color:red">Error: {results[0]["Error"]}</div>'
126
  html = [
127
- '<style>td,th{padding:8px;}th{background:#f4f1e9;}tr:nth-child(even){background:#f9f9f9;}tr:hover{background:#e6e2d3;}table{border-radius:8px;overflow:hidden;box-shadow:0 2px 8px #e6e2d3;}td{vertical-align:top;}</style>',
 
 
 
 
 
 
 
 
 
 
 
128
  '<table style="border-collapse:collapse;width:100%;font-size:1em;">',
129
  '<thead><tr>'
130
  '<th>Reference</th><th>Text</th><th>Similarity</th><th>Book</th><th>Chapter</th><th>Verse</th>'
@@ -142,10 +179,10 @@ def format_results_html(results: List[Dict[str, Any]]) -> str:
142
  html.append('</tbody></table>')
143
  return ''.join(html)
144
 
145
- def search(query: str, books: List[str], limit: int) -> str:
146
  if not query.strip():
147
  return "<div>Please enter a search query.</div>"
148
- results = find_similar(query, books, limit)
149
  return format_results_html(results)
150
 
151
  with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
@@ -170,26 +207,32 @@ with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
170
  multiselect=True
171
  )
172
  with gr.Row():
173
- limit = gr.Slider(
174
- minimum=1,
175
- maximum=50,
176
- value=20,
177
- step=1,
178
- label="Number of results"
179
- )
 
 
 
 
 
 
 
180
  with gr.Row():
181
  search_btn = gr.Button("Search", variant="primary")
182
  output = gr.HTML(label="Results")
183
 
184
-
185
  search_btn.click(
186
  fn=search,
187
- inputs=[query, book_select, limit],
188
  outputs=output
189
  )
190
  query.submit(
191
  fn=search,
192
- inputs=[query, book_select, limit],
193
  outputs=output
194
  )
195
  if __name__ == "__main__":
 
81
  highlighted.append(token)
82
  return ''.join(highlighted)
83
 
84
+ def find_similar(query: str, books: List[str], limit: int = 50, search_method: str = "vector") -> List[Dict[str, Any]]:
85
  try:
 
86
  client = weaviate.connect_to_weaviate_cloud(
87
  cluster_url=WEAVIATE_URL,
88
  auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
 
93
  if books:
94
  selected_books = [VULGATE_BOOKS[book] for book in books]
95
  filter_condition = Filter.by_property("book").contains_any(selected_books)
96
+
97
+ # Always encode the query vector since we need it for both vector and hybrid search
98
+ query_vector = model.encode([query])[0]
99
+
100
+ if search_method == "vector":
101
+ response = vulgate.query.near_vector(
102
+ near_vector=query_vector,
103
+ limit=limit,
104
+ return_metadata=MetadataQuery(distance=True),
105
+ filters=filter_condition
106
+ )
107
+ elif search_method == "bm25":
108
+ response = vulgate.query.bm25(
109
+ query=query,
110
+ limit=limit,
111
+ filters=filter_condition
112
+ )
113
+ else: # hybrid
114
+ response = vulgate.query.hybrid(
115
+ query=query,
116
+ vector=query_vector,
117
+ limit=limit,
118
+ filters=filter_condition
119
+ )
120
+
121
  results = []
122
  for obj in response.objects:
123
  highlighted_text = highlight_matching_words(obj.properties["text"], query)
124
+
125
+ # Handle different types of scores
126
+ similarity = 1.0 # default value
127
+ if hasattr(obj.metadata, 'distance') and obj.metadata.distance is not None:
128
+ similarity = 1 - obj.metadata.distance
129
+ elif hasattr(obj.metadata, 'score') and obj.metadata.score is not None:
130
+ similarity = obj.metadata.score
131
+
132
  results.append({
133
  "Reference": f"{obj.properties['book']} {obj.properties['chapter']}:{obj.properties['verse']}",
134
  "Book": obj.properties["book"],
 
136
  "Verse": obj.properties["verse"],
137
  "Text": highlighted_text,
138
  "RawText": obj.properties["text"],
139
+ "Similarity": round(similarity, 3)
140
  })
141
  return results
142
  finally:
 
150
  if "Error" in results[0]:
151
  return f'<div style="color:red">Error: {results[0]["Error"]}</div>'
152
  html = [
153
+ '<style>',
154
+ '/* Light mode styles */',
155
+ 'td,th{padding:8px;}th{background:#f4f1e9;}tr:nth-child(even){background:#f9f9f9;}tr:hover{background:#e6e2d3;}table{border-radius:8px;overflow:hidden;box-shadow:0 2px 8px #e6e2d3;}td{vertical-align:top;}',
156
+ '/* Dark mode styles */',
157
+ '@media (prefers-color-scheme: dark) {',
158
+ ' th { background: #232323; color: #f4f1e9; }',
159
+ ' tr:nth-child(even) { background: #232323; }',
160
+ ' tr:hover { background: #333333; }',
161
+ ' table { box-shadow: 0 2px 8px #111; }',
162
+ ' td { color: #f4f1e9; }',
163
+ '}',
164
+ '</style>',
165
  '<table style="border-collapse:collapse;width:100%;font-size:1em;">',
166
  '<thead><tr>'
167
  '<th>Reference</th><th>Text</th><th>Similarity</th><th>Book</th><th>Chapter</th><th>Verse</th>'
 
179
  html.append('</tbody></table>')
180
  return ''.join(html)
181
 
182
+ def search(query: str, books: List[str], limit: int, search_method: str) -> str:
183
  if not query.strip():
184
  return "<div>Please enter a search query.</div>"
185
+ results = find_similar(query, books, limit, search_method)
186
  return format_results_html(results)
187
 
188
  with gr.Blocks(title="Latin Vulgate Verse Similarity Search") as demo:
 
207
  multiselect=True
208
  )
209
  with gr.Row():
210
+ with gr.Column(scale=1):
211
+ search_method = gr.Radio(
212
+ choices=["vector", "bm25", "hybrid"],
213
+ label="Search Method",
214
+ value="vector"
215
+ )
216
+ with gr.Column(scale=1):
217
+ limit = gr.Slider(
218
+ minimum=1,
219
+ maximum=50,
220
+ value=20,
221
+ step=1,
222
+ label="Number of results"
223
+ )
224
  with gr.Row():
225
  search_btn = gr.Button("Search", variant="primary")
226
  output = gr.HTML(label="Results")
227
 
 
228
  search_btn.click(
229
  fn=search,
230
+ inputs=[query, book_select, limit, search_method],
231
  outputs=output
232
  )
233
  query.submit(
234
  fn=search,
235
+ inputs=[query, book_select, limit, search_method],
236
  outputs=output
237
  )
238
  if __name__ == "__main__":