MohamedFahim commited on
Commit
6aef3db
ยท
verified ยท
1 Parent(s): e3ad748

Update interface.py

Browse files
Files changed (1) hide show
  1. interface.py +322 -509
interface.py CHANGED
@@ -3,264 +3,355 @@ import requests
3
  import time
4
  import os
5
  import json
 
6
 
7
 
8
- # Use localhost for HF Spaces since both services run in the same container
9
- API_BASE_URL = "http://localhost:8000"
10
 
 
 
 
 
11
 
12
- # ==================== EXISTING FUNCTIONS ====================
13
-
14
- def extract_links(url):
15
- """Extract links from the given URL"""
16
- endpoint = f"{API_BASE_URL}/extract_links"
17
- payload = {"url": url}
18
- try:
19
- response = requests.post(endpoint, json=payload, timeout=30)
20
- if response.status_code == 200:
21
- return response.json()["unique_links"]
22
- else:
23
- raise Exception(f"Failed to extract links: {response.text}")
24
- except requests.exceptions.RequestException as e:
25
- raise Exception(f"Connection error: {str(e)}")
26
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- def extract_text(urls):
29
- """Extract text from URLs"""
30
- endpoint = f"{API_BASE_URL}/extract_text"
31
- try:
32
- response = requests.post(endpoint, json=urls, timeout=60)
33
- if response.status_code == 200:
34
- return response.json()["file_saved"]
35
- else:
36
- raise Exception(f"Failed to extract text: {response.text}")
37
- except requests.exceptions.RequestException as e:
38
- raise Exception(f"Connection error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
 
40
 
41
- def perform_rag(file_path, prompt):
42
- """Perform RAG on the extracted text"""
43
- endpoint = f"{API_BASE_URL}/rag"
44
- payload = {"file_path": file_path, "prompt": prompt}
 
 
 
45
  try:
46
- response = requests.post(endpoint, json=payload, timeout=60)
47
- if response.status_code == 200:
48
- return response.json()
49
- else:
50
- raise Exception(f"Failed to perform RAG: {response.text}")
51
- except requests.exceptions.RequestException as e:
52
- raise Exception(f"Connection error: {str(e)}")
53
-
 
 
54
 
55
- def check_api_health():
56
- """Check if FastAPI is running with enhanced health check"""
 
 
57
  try:
58
- response = requests.get(f"{API_BASE_URL}/health_check", timeout=5)
59
- return response.status_code == 200, response.json() if response.status_code == 200 else {}
60
- except:
61
- return False, {}
 
 
 
 
 
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- def process_multiple_links(url, prompt):
65
- """Process multiple links from a webpage"""
66
  if not url or not prompt:
67
- return "โŒ Error: Please provide both URL and prompt", "", ""
68
-
69
- is_healthy, _ = check_api_health()
70
- if not is_healthy:
71
- return "โŒ Error: FastAPI service is not available. Please wait a moment and try again.", "", ""
72
 
73
  try:
74
- links = extract_links(url)
 
 
 
75
  sample_links = links[:5]
76
- file_path = extract_text(sample_links)
77
- result = perform_rag(file_path, prompt)
 
 
 
 
78
 
79
  status_msg = f"โœ… Processed {len(sample_links)} pages from {len(links)} total links found"
80
  response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
81
- sources_text = result['sources']
82
 
83
- return status_msg, response_text, sources_text
84
 
 
 
85
  except Exception as e:
86
- return f"โŒ Error: {str(e)}", "", ""
87
 
88
-
89
- def process_homepage_only(url, prompt):
90
- """Process homepage content only"""
91
  if not url or not prompt:
92
- return "โŒ Error: Please provide both URL and prompt", "", ""
93
-
94
- is_healthy, _ = check_api_health()
95
- if not is_healthy:
96
- return "โŒ Error: FastAPI service is not available. Please wait a moment and try again.", "", ""
97
 
98
  try:
99
- file_path = extract_text([url])
100
- result = perform_rag(file_path, prompt)
 
 
 
 
 
101
 
102
  status_msg = "โœ… Processed homepage content"
103
  response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
104
- sources_text = result['sources']
105
 
106
- return status_msg, response_text, sources_text
107
 
 
 
108
  except Exception as e:
109
- return f"โŒ Error: {str(e)}", "", ""
110
 
 
111
 
112
- # ==================== NEW RAG DOCUMENT FUNCTIONS ====================
113
-
114
- def upload_single_document(file, collection_name):
115
- """Upload a single document (PDF/Markdown) to RAG system"""
116
  if not file:
117
- return "โŒ Error: Please select a file to upload", None
118
 
119
  if not collection_name:
120
  collection_name = "default"
121
 
122
  try:
123
- endpoint = f"{API_BASE_URL}/upload_document"
124
- with open(file.name, 'rb') as f:
125
- files = {'file': (os.path.basename(file.name), f, 'application/pdf')}
126
- params = {'collection_name': collection_name}
127
- response = requests.post(endpoint, files=files, params=params, timeout=120)
128
 
129
- if response.status_code == 200:
130
- result = response.json()
131
- status_msg = f"โœ… Successfully uploaded '{result['filename']}'\n๐Ÿ“Š Created {result['chunks_created']} chunks\n๐Ÿ—‚๏ธ Collection: {collection_name}\n๐Ÿ“ File Type: {result['file_type']}"
132
- return status_msg, result
133
- else:
134
- return f"โŒ Error: {response.text}", None
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
137
- return f"โŒ Error: {str(e)}", None
138
-
139
 
140
- def upload_multiple_documents(files, collection_name):
141
- """Upload multiple documents at once"""
142
  if not files or len(files) == 0:
143
- return "โŒ Error: Please select files to upload", None
144
 
145
  if not collection_name:
146
  collection_name = "default"
147
 
148
  try:
149
- endpoint = f"{API_BASE_URL}/upload_multiple_documents"
150
- files_to_upload = []
151
 
152
- for file in files:
153
- with open(file.name, 'rb') as f:
 
 
154
  file_content = f.read()
155
  files_to_upload.append(
156
- ('files', (os.path.basename(file.name), file_content, 'application/pdf'))
157
  )
 
158
 
 
159
  params = {'collection_name': collection_name}
160
- response = requests.post(endpoint, files=files_to_upload, params=params, timeout=180)
 
 
 
 
 
 
161
 
162
- if response.status_code == 200:
163
- result = response.json()
164
- status_msg = f"โœ… Successfully uploaded {result['successful_uploads']} files\nโŒ Failed: {result['failed_uploads']}\n๐Ÿ—‚๏ธ Collection: {collection_name}"
165
- return status_msg, result
166
- else:
167
- return f"โŒ Error: {response.text}", None
168
-
 
 
 
 
169
  except Exception as e:
170
- return f"โŒ Error: {str(e)}", None
171
 
172
-
173
- def query_rag_documents(query, collection_name, top_k):
174
- """Query documents in a collection"""
175
  if not query:
176
- return "โŒ Error: Please enter a query", "", None
177
 
178
  if not collection_name:
179
- return "โŒ Error: Please select a collection", "", None
180
 
181
  try:
182
- endpoint = f"{API_BASE_URL}/query_documents"
183
- payload = {
184
- "query": query,
185
- "collection_name": collection_name,
186
- "top_k": top_k
187
- }
188
- response = requests.post(endpoint, json=payload, timeout=60)
189
-
190
- if response.status_code == 200:
191
- result = response.json()
192
-
193
- if "I couldn't find this information" in result['answer']:
194
- status_msg = "โš ๏ธ No relevant information found in documents"
195
- else:
196
- status_msg = f"โœ… Found relevant information from {len(result['sources'])} sources"
197
-
198
- answer_text = f"**Query:** {result['query']}\n\n**Answer:** {result['answer']}"
199
-
200
- return status_msg, answer_text, result['sources']
 
201
  else:
202
- return f"โŒ Error: {response.text}", "", None
203
-
 
 
 
 
 
 
204
  except Exception as e:
205
- return f"โŒ Error: {str(e)}", "", None
206
 
207
-
208
- def list_all_collections():
209
- """List all available collections"""
210
  try:
211
- endpoint = f"{API_BASE_URL}/list_collections"
212
- response = requests.get(endpoint, timeout=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- if response.status_code == 200:
215
- result = response.json()
216
- collections = result['collections']
217
-
218
- if not collections:
219
- return "๐Ÿ“‚ No collections found. Upload documents to create a collection.", None, []
220
-
221
- summary = f"๐Ÿ“Š Total Collections: {len(collections)}\n\n"
222
- for col in collections:
223
- summary += f"๐Ÿ—‚๏ธ **{col['collection_name']}**\n"
224
- summary += f" - Chunks: {col['total_chunks']}\n"
225
- summary += f" - Dimension: {col['dimension']}\n\n"
226
-
227
- collection_names = [col['collection_name'] for col in collections]
228
- return summary, result, collection_names
229
- else:
230
- return f"โŒ Error: {response.text}", None, []
231
-
232
  except Exception as e:
233
- return f"โŒ Error: {str(e)}", None, []
234
 
235
-
236
- def delete_collection(collection_name):
237
- """Delete a collection"""
238
  if not collection_name:
239
- return "โŒ Error: Please select a collection to delete"
240
 
241
  try:
242
- endpoint = f"{API_BASE_URL}/delete_collection/{collection_name}"
243
- response = requests.delete(endpoint, timeout=10)
 
 
 
 
 
 
 
 
 
 
244
 
245
- if response.status_code == 200:
246
- return f"โœ… Successfully deleted collection '{collection_name}'"
247
- else:
248
- return f"โŒ Error: {response.text}"
249
-
250
  except Exception as e:
251
- return f"โŒ Error: {str(e)}"
252
 
253
-
254
- def get_system_health():
255
- """Get comprehensive system health information"""
256
  try:
257
  is_healthy, health_data = check_api_health()
258
 
259
  if not is_healthy:
260
- return "โŒ System Offline", None
261
 
262
- health_summary = f"""
263
- ๐ŸŸข **System Status: Healthy**
264
 
265
  ๐Ÿ“Š **Configuration:**
266
  - Supabase: {'โœ… Configured' if health_data.get('supabase_configured') else 'โŒ Not Configured'}
@@ -270,21 +361,26 @@ def get_system_health():
270
  ๐Ÿ“ **Vector Stores:**
271
  - Total Collections: {health_data.get('vector_stores', 0)}
272
  - Total Chunks: {health_data.get('total_chunks', 0)}
 
 
 
 
273
  """
274
  return health_summary, health_data
275
 
 
 
276
  except Exception as e:
277
- return f"โŒ Error: {str(e)}", None
278
-
279
 
280
- # ==================== DARK THEME CSS ====================
281
 
 
282
  custom_css = """
283
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
284
  * {
285
  font-family: 'Inter', sans-serif !important;
286
  }
287
- /* Dark animated gradient background */
288
  .gradio-container {
289
  background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
290
  background-size: 400% 400%;
@@ -295,7 +391,6 @@ custom_css = """
295
  50% { background-position: 100% 50%; }
296
  100% { background-position: 0% 50%; }
297
  }
298
- /* Main container with dark glassmorphism */
299
  .main-container {
300
  backdrop-filter: blur(20px);
301
  background: rgba(20, 20, 30, 0.85);
@@ -306,16 +401,9 @@ custom_css = """
306
  animation: fadeInUp 0.8s ease;
307
  }
308
  @keyframes fadeInUp {
309
- from {
310
- opacity: 0;
311
- transform: translateY(30px);
312
- }
313
- to {
314
- opacity: 1;
315
- transform: translateY(0);
316
- }
317
  }
318
- /* Animated title with neon glow */
319
  .animated-title {
320
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
321
  background-size: 200% 200%;
@@ -334,7 +422,6 @@ custom_css = """
334
  50% { background-position: 100% 50%; }
335
  100% { background-position: 0% 50%; }
336
  }
337
- /* Floating animation for icons */
338
  .floating-icon {
339
  animation: float 3s ease-in-out infinite;
340
  display: inline-block;
@@ -343,7 +430,6 @@ custom_css = """
343
  0%, 100% { transform: translateY(0px); }
344
  50% { transform: translateY(-10px); }
345
  }
346
- /* Dark input fields with neon borders */
347
  textarea, input[type="text"] {
348
  font-size: 1.1rem !important;
349
  border-radius: 12px !important;
@@ -358,14 +444,11 @@ textarea:focus, input[type="text"]:focus {
358
  transform: translateY(-2px);
359
  background: rgba(35, 35, 50, 0.95) !important;
360
  }
361
- /* Dark labels */
362
  label {
363
  font-weight: 600 !important;
364
  color: #b0b0b0 !important;
365
  font-size: 1.1rem !important;
366
- transition: all 0.3s ease;
367
  }
368
- /* Neon buttons with hover effects */
369
  .gr-button {
370
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
371
  color: #0a0a0f !important;
@@ -380,236 +463,43 @@ label {
380
  .gr-button:hover {
381
  transform: translateY(-3px) !important;
382
  box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
383
- filter: brightness(1.1);
384
  }
385
- .gr-button:active {
386
- transform: translateY(-1px) !important;
387
- }
388
- /* Dark output boxes with glassmorphism */
389
  .output-box {
390
  background: rgba(30, 30, 45, 0.95) !important;
391
  border-radius: 16px !important;
392
  border: 1px solid rgba(0, 242, 254, 0.2) !important;
393
  backdrop-filter: blur(10px);
394
- animation: slideIn 0.5s ease;
395
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
396
  color: #e0e0e0 !important;
397
  padding: 1.5rem !important;
398
- min-height: 200px !important;
399
- }
400
- /* Markdown container styling */
401
- .output-box .prose, .output-box .markdown {
402
- font-size: 1.2rem !important;
403
- line-height: 1.8 !important;
404
- }
405
- /* Markdown styling */
406
- .output-box h1 {
407
- color: #00f2fe !important;
408
- margin-top: 1.5rem !important;
409
- margin-bottom: 1rem !important;
410
- font-size: 2rem !important;
411
- }
412
- .output-box h2 {
413
- color: #00f2fe !important;
414
- margin-top: 1.5rem !important;
415
- margin-bottom: 0.75rem !important;
416
- font-size: 1.6rem !important;
417
- }
418
- .output-box h3 {
419
- color: #4facfe !important;
420
- margin-top: 1rem !important;
421
- margin-bottom: 0.5rem !important;
422
- font-size: 1.3rem !important;
423
- }
424
- .output-box p {
425
- color: #e0e0e0 !important;
426
- line-height: 1.9 !important;
427
- margin-bottom: 1.2rem !important;
428
- font-size: 1.15rem !important;
429
  }
430
  .output-box strong {
431
  color: #4facfe !important;
432
  font-weight: 600 !important;
433
- font-size: 1.2rem !important;
434
- }
435
- .output-box code {
436
- background: rgba(0, 242, 254, 0.15) !important;
437
- padding: 3px 8px !important;
438
- border-radius: 6px !important;
439
- color: #00f2fe !important;
440
- font-size: 1.1rem !important;
441
- border: 1px solid rgba(0, 242, 254, 0.3) !important;
442
- }
443
- .output-box ul, .output-box ol {
444
- font-size: 1.15rem !important;
445
- line-height: 1.8 !important;
446
- margin-left: 1.5rem !important;
447
- color: #e0e0e0 !important;
448
- }
449
- .output-box li {
450
- margin-bottom: 0.5rem !important;
451
- }
452
- /* Enhanced response area glow effect */
453
- .output-box:hover {
454
- border-color: rgba(0, 242, 254, 0.4) !important;
455
- box-shadow: 0 6px 25px rgba(0, 242, 254, 0.3) !important;
456
- transition: all 0.3s ease !important;
457
- }
458
- /* JSON viewer styling */
459
- .output-box pre {
460
- background: rgba(20, 20, 30, 0.9) !important;
461
- border-radius: 10px !important;
462
- padding: 1.5rem !important;
463
- overflow-x: auto !important;
464
- border: 1px solid rgba(0, 242, 254, 0.3) !important;
465
- font-size: 1.05rem !important;
466
- line-height: 1.6 !important;
467
- }
468
- .output-box .json-holder {
469
- background: rgba(20, 20, 30, 0.9) !important;
470
- border-radius: 10px !important;
471
- padding: 1.5rem !important;
472
- font-size: 1.05rem !important;
473
- }
474
- /* Special styling for AI response area */
475
- div[data-testid="markdown"] {
476
- background: rgba(35, 35, 50, 0.6) !important;
477
- padding: 1.5rem !important;
478
- border-radius: 12px !important;
479
- border: 1px solid rgba(0, 242, 254, 0.15) !important;
480
- }
481
- @keyframes slideIn {
482
- from {
483
- opacity: 0;
484
- transform: translateX(-20px);
485
- }
486
- to {
487
- opacity: 1;
488
- transform: translateX(0);
489
- }
490
- }
491
- /* Dark tab styling */
492
- .tab-nav button {
493
- border-radius: 12px !important;
494
- font-weight: 600 !important;
495
- transition: all 0.3s ease !important;
496
- font-size: 1.05rem !important;
497
- background: rgba(30, 30, 45, 0.6) !important;
498
- color: #b0b0b0 !important;
499
- border: 1px solid rgba(0, 242, 254, 0.2) !important;
500
- }
501
- .tab-nav button:hover {
502
- background: rgba(40, 40, 55, 0.8) !important;
503
- border-color: rgba(0, 242, 254, 0.4) !important;
504
- }
505
- .tab-nav button[aria-selected="true"] {
506
- background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
507
- color: #0a0a0f !important;
508
- box-shadow: 0 4px 15px rgba(0, 242, 254, 0.4) !important;
509
- border: none !important;
510
- }
511
- /* Example box styling */
512
- .example-box {
513
- background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%);
514
- border-radius: 16px;
515
- padding: 1.5rem;
516
- border-left: 4px solid #00f2fe;
517
- margin-top: 2rem;
518
- transition: all 0.3s ease;
519
- animation: fadeIn 1s ease;
520
- }
521
- .example-box:hover {
522
- transform: translateX(5px);
523
- box-shadow: 0 5px 20px rgba(0, 242, 254, 0.2);
524
- }
525
- .example-box h3, .example-box p {
526
- color: #e0e0e0 !important;
527
- }
528
- @keyframes fadeIn {
529
- from { opacity: 0; }
530
- to { opacity: 1; }
531
- }
532
- /* Dark note box with shimmer effect */
533
- .note-box {
534
- background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%);
535
- border-radius: 12px;
536
- padding: 1rem;
537
- border-left: 4px solid #00f2fe;
538
- margin-top: 1rem;
539
- position: relative;
540
- overflow: hidden;
541
- }
542
- .note-box::before {
543
- content: '';
544
- position: absolute;
545
- top: 0;
546
- left: -100%;
547
- width: 100%;
548
- height: 100%;
549
- background: linear-gradient(90deg, transparent, rgba(0, 242, 254, 0.2), transparent);
550
- animation: shimmer 3s infinite;
551
- }
552
- .note-box p {
553
- color: #00c6ff !important;
554
- position: relative;
555
- z-index: 1;
556
- }
557
- @keyframes shimmer {
558
- 0% { left: -100%; }
559
- 100% { left: 100%; }
560
- }
561
- /* Dark subtitle animation */
562
- .subtitle {
563
- color: #b0b0b0;
564
- font-size: 1.3rem;
565
- text-align: center;
566
- margin-bottom: 2rem;
567
- animation: fadeInDown 1s ease;
568
- }
569
- @keyframes fadeInDown {
570
- from {
571
- opacity: 0;
572
- transform: translateY(-20px);
573
- }
574
- to {
575
- opacity: 1;
576
- transform: translateY(0);
577
- }
578
  }
579
  """
580
 
581
-
582
- # ==================== MAIN INTERFACE ====================
583
-
584
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
585
  gr.HTML("""
586
  <div class="main-container">
587
  <h1 class="animated-title">
588
- <span class="floating-icon">๐ŸŒ</span> Advanced Web & Document RAG System
589
  </h1>
590
- <p class="subtitle">
591
- Extract from web pages, upload documents (PDF/Markdown), and ask questions using AI-powered retrieval
592
  </p>
593
  </div>
594
  """)
595
 
596
  with gr.Tabs() as tabs:
597
- # ==================== WEB URL TABS (EXISTING) ====================
598
-
599
  with gr.Tab("๐Ÿ”— Multiple Links Analysis"):
600
  with gr.Row():
601
  with gr.Column():
602
- url_input_multi = gr.Textbox(
603
- label="๐ŸŒ Website URL",
604
- placeholder="https://example.com",
605
- elem_classes="output-box"
606
- )
607
- prompt_input_multi = gr.Textbox(
608
- label="๐Ÿ’ญ Your Question",
609
- placeholder="What is this website about?",
610
- lines=3,
611
- elem_classes="output-box"
612
- )
613
  submit_btn_multi = gr.Button("โœจ Analyze Multiple Links", variant="primary")
614
 
615
  with gr.Row():
@@ -627,17 +517,8 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
627
  with gr.Tab("๐Ÿ  Homepage Only Analysis"):
628
  with gr.Row():
629
  with gr.Column():
630
- url_input_home = gr.Textbox(
631
- label="๐ŸŒ Website URL",
632
- placeholder="https://example.com",
633
- elem_classes="output-box"
634
- )
635
- prompt_input_home = gr.Textbox(
636
- label="๐Ÿ’ญ Your Question",
637
- placeholder="What is this website about?",
638
- lines=3,
639
- elem_classes="output-box"
640
- )
641
  submit_btn_home = gr.Button("โœจ Analyze Homepage", variant="primary")
642
 
643
  with gr.Row():
@@ -652,31 +533,28 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
652
  outputs=[status_output_home, response_output_home, sources_output_home]
653
  )
654
 
655
- # ==================== DOCUMENT UPLOAD TAB (NEW) ====================
656
-
657
  with gr.Tab("๐Ÿ“„ Document Upload & Query"):
658
  gr.Markdown("""
659
  ### Upload PDF or Markdown documents and query them using RAG
660
  - Supports **PDF** and **Markdown** files
661
- - Documents are chunked and stored in vector database (FAISS)
662
- - Organize documents into collections
663
  """)
664
 
665
  with gr.Row():
666
  with gr.Column():
667
  gr.Markdown("#### ๐Ÿ“ค Upload Documents")
668
  collection_name_upload = gr.Textbox(
669
- label="๐Ÿ—‚๏ธ Collection Name",
670
  placeholder="default",
671
- value="default",
672
- elem_classes="output-box"
673
  )
674
 
675
  with gr.Tab("Single File"):
676
  file_upload_single = gr.File(
677
  label="๐Ÿ“ Select Document (PDF/Markdown)",
678
- file_types=[".pdf", ".md", ".txt"],
679
- elem_classes="output-box"
680
  )
681
  upload_btn_single = gr.Button("๐Ÿ“ค Upload Single Document", variant="primary")
682
  upload_status_single = gr.Textbox(label="๐Ÿ“Š Upload Status", elem_classes="output-box")
@@ -686,31 +564,27 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
686
  file_upload_multi = gr.File(
687
  label="๐Ÿ“ Select Documents (PDF/Markdown)",
688
  file_count="multiple",
689
- file_types=[".pdf", ".md", ".txt"],
690
- elem_classes="output-box"
691
  )
692
- upload_btn_multi = gr.Button("๐Ÿ“ค Upload Multiple Documents", variant="primary")
693
  upload_status_multi = gr.Textbox(label="๐Ÿ“Š Upload Status", elem_classes="output-box")
694
  upload_result_multi = gr.JSON(label="๐Ÿ“‹ Upload Details", elem_classes="output-box")
695
 
696
  with gr.Column():
697
  gr.Markdown("#### ๐Ÿ” Query Documents")
698
 
699
- # Refresh button to update collection list
700
  refresh_btn = gr.Button("๐Ÿ”„ Refresh Collections", variant="secondary")
701
 
702
  collection_dropdown = gr.Dropdown(
703
  label="๐Ÿ—‚๏ธ Select Collection",
704
  choices=["default"],
705
- value="default",
706
- elem_classes="output-box"
707
  )
708
 
709
  query_input = gr.Textbox(
710
  label="๐Ÿ’ญ Your Question",
711
  placeholder="Ask a question about your documents...",
712
- lines=3,
713
- elem_classes="output-box"
714
  )
715
 
716
  top_k_slider = gr.Slider(
@@ -718,8 +592,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
718
  maximum=10,
719
  value=3,
720
  step=1,
721
- label="๐Ÿ“Š Number of Sources (top-k)",
722
- elem_classes="output-box"
723
  )
724
 
725
  query_btn = gr.Button("๐Ÿ” Search Documents", variant="primary")
@@ -728,57 +601,37 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
728
  query_response = gr.Markdown(label="๐Ÿค– AI Answer", elem_classes="output-box")
729
  query_sources = gr.JSON(label="๐Ÿ“š Source Citations", elem_classes="output-box")
730
 
731
- # Upload button actions
732
  upload_btn_single.click(
733
  fn=upload_single_document,
734
  inputs=[file_upload_single, collection_name_upload],
735
  outputs=[upload_status_single, upload_result_single]
736
  )
737
 
738
- upload_btn_multi.click(
739
  fn=upload_multiple_documents,
740
  inputs=[file_upload_multi, collection_name_upload],
741
  outputs=[upload_status_multi, upload_result_multi]
742
  )
743
 
744
- # Query button action
745
  query_btn.click(
746
  fn=query_rag_documents,
747
  inputs=[query_input, collection_dropdown, top_k_slider],
748
  outputs=[query_status, query_response, query_sources]
749
  )
750
 
751
- # Refresh collections function
752
  def refresh_collections():
753
- _, _, collection_names = list_all_collections()
754
- if not collection_names:
755
- collection_names = ["default"]
756
- return gr.Dropdown.update(choices=collection_names)
757
 
758
  refresh_btn.click(
759
  fn=refresh_collections,
760
- inputs=[],
761
  outputs=[collection_dropdown]
762
  )
763
-
764
- gr.HTML("""
765
- <div class="example-box">
766
- <h3 style="margin-top: 0; font-size: 1.4rem;">
767
- <span class="floating-icon">๐Ÿ’ก</span> Example Usage
768
- </h3>
769
- <p style="font-size: 1.1rem;"><strong>1.</strong> Upload your PDF/Markdown documents to a collection</p>
770
- <p style="font-size: 1.1rem;"><strong>2.</strong> Ask questions like: "What are the main findings?" or "Summarize the methodology"</p>
771
- <p style="font-size: 1.1rem;"><strong>3.</strong> System returns answers with source citations</p>
772
- </div>
773
- """)
774
-
775
- # ==================== COLLECTION MANAGEMENT TAB (NEW) ====================
776
 
 
777
  with gr.Tab("๐Ÿ—‚๏ธ Collection Management"):
778
- gr.Markdown("""
779
- ### Manage Your Document Collections
780
- View, analyze, and delete document collections stored in the vector database.
781
- """)
782
 
783
  with gr.Row():
784
  with gr.Column():
@@ -788,56 +641,24 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
788
 
789
  with gr.Column():
790
  gr.Markdown("#### ๐Ÿ—‘๏ธ Delete Collection")
791
- collection_to_delete = gr.Dropdown(
792
- label="๐Ÿ—‚๏ธ Select Collection to Delete",
793
- choices=["default"],
794
- elem_classes="output-box"
795
- )
796
  delete_btn = gr.Button("๐Ÿ—‘๏ธ Delete Collection", variant="stop")
797
  delete_status = gr.Textbox(label="๐Ÿ“Š Status", elem_classes="output-box")
798
 
799
- # List collections action
800
- def list_and_update_dropdown():
801
- summary, result, collection_names = list_all_collections()
802
- if not collection_names:
803
- collection_names = ["default"]
804
- return summary, result, gr.Dropdown.update(choices=collection_names)
805
-
806
  list_btn.click(
807
- fn=list_and_update_dropdown,
808
- inputs=[],
809
  outputs=[collections_output, collections_json, collection_to_delete]
810
  )
811
 
812
- # Delete collection action
813
- def delete_and_refresh(collection_name):
814
- status = delete_collection(collection_name)
815
- summary, result, collection_names = list_all_collections()
816
- if not collection_names:
817
- collection_names = ["default"]
818
- return status, summary, result, gr.Dropdown.update(choices=collection_names)
819
-
820
  delete_btn.click(
821
- fn=delete_and_refresh,
822
  inputs=[collection_to_delete],
823
  outputs=[delete_status, collections_output, collections_json, collection_to_delete]
824
  )
825
-
826
- gr.HTML("""
827
- <div class="note-box">
828
- <p style="margin: 0; font-size: 1.05rem;">
829
- โš ๏ธ <strong>Warning:</strong> Deleting a collection is permanent and cannot be undone. All documents in the collection will be removed.
830
- </p>
831
- </div>
832
- """)
833
-
834
- # ==================== SYSTEM HEALTH TAB (NEW) ====================
835
 
 
836
  with gr.Tab("โš™๏ธ System Health"):
837
- gr.Markdown("""
838
- ### System Status & Configuration
839
- Monitor system health, API connections, and vector database statistics.
840
- """)
841
 
842
  health_check_btn = gr.Button("๐Ÿ” Check System Health", variant="primary")
843
  health_output = gr.Markdown(label="๐ŸŸข System Status", elem_classes="output-box")
@@ -845,38 +666,30 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
845
 
846
  health_check_btn.click(
847
  fn=get_system_health,
848
- inputs=[],
849
  outputs=[health_output, health_json]
850
  )
851
-
852
- gr.HTML("""
853
- <div class="example-box">
854
- <h3 style="margin-top: 0; font-size: 1.4rem;">
855
- <span class="floating-icon">๐Ÿ“Š</span> Health Check Information
856
- </h3>
857
- <p style="font-size: 1.1rem;"><strong>Supabase:</strong> Cloud storage for documents and extracted text</p>
858
- <p style="font-size: 1.1rem;"><strong>Groq API:</strong> LLM for generating answers</p>
859
- <p style="font-size: 1.1rem;"><strong>Vector Stores:</strong> FAISS collections for document embeddings</p>
860
- </div>
861
- """)
862
 
863
- # Footer
864
  gr.HTML("""
865
- <div class="note-box" style="margin-top: 2rem;">
866
- <p style="margin: 0; font-size: 1.05rem;">
867
- โ„น๏ธ <strong>Note:</strong> If you encounter connection errors, please wait a moment for the system to initialize and try again.
868
- The system supports both web scraping and document upload for comprehensive question-answering capabilities.
869
  </p>
870
  </div>
871
  """)
872
 
873
-
874
- # Launch the interface
875
  if __name__ == "__main__":
 
 
 
 
 
 
876
  interface.launch(
877
  server_name="0.0.0.0",
878
  server_port=7860,
879
  share=False,
880
  show_error=True,
881
- quiet=False
882
  )
 
3
  import time
4
  import os
5
  import json
6
+ from typing import Optional, Tuple, List
7
 
8
 
9
+ # ==================== API CONFIGURATION ====================
 
10
 
11
+ # For Hugging Face Spaces, both Gradio and FastAPI run in same container
12
+ API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
13
+ MAX_RETRIES = 3
14
+ RETRY_DELAY = 5
15
 
16
+ # ==================== UTILITY FUNCTIONS ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def check_api_health(max_attempts=3) -> Tuple[bool, dict]:
19
+ """Check API health with retry logic"""
20
+ for attempt in range(max_attempts):
21
+ try:
22
+ response = requests.get(f"{API_BASE_URL}/health_check", timeout=10)
23
+ if response.status_code == 200:
24
+ return True, response.json()
25
+ except Exception as e:
26
+ if attempt < max_attempts - 1:
27
+ time.sleep(2)
28
+ return False, {}
29
 
30
+ def make_request_with_retry(method: str, endpoint: str, **kwargs) -> requests.Response:
31
+ """Make API request with retry logic"""
32
+ url = f"{API_BASE_URL}{endpoint}"
33
+
34
+ for attempt in range(MAX_RETRIES):
35
+ try:
36
+ if method.upper() == "GET":
37
+ response = requests.get(url, **kwargs)
38
+ elif method.upper() == "POST":
39
+ response = requests.post(url, **kwargs)
40
+ elif method.upper() == "DELETE":
41
+ response = requests.delete(url, **kwargs)
42
+ else:
43
+ raise ValueError(f"Unsupported method: {method}")
44
+
45
+ if response.status_code in [200, 201]:
46
+ return response
47
+ elif response.status_code == 500 and attempt < MAX_RETRIES - 1:
48
+ time.sleep(RETRY_DELAY)
49
+ continue
50
+ else:
51
+ response.raise_for_status()
52
+
53
+ except requests.exceptions.ConnectionError:
54
+ if attempt < MAX_RETRIES - 1:
55
+ time.sleep(RETRY_DELAY)
56
+ else:
57
+ raise gr.Error("โŒ Cannot connect to API. Please refresh and try again.", duration=10)
58
+ except requests.exceptions.Timeout:
59
+ if attempt < MAX_RETRIES - 1:
60
+ time.sleep(RETRY_DELAY)
61
+ else:
62
+ raise gr.Error("โฑ๏ธ Request timeout. Please try again.", duration=10)
63
+ except Exception as e:
64
+ if attempt == MAX_RETRIES - 1:
65
+ raise gr.Error(f"โŒ Error: {str(e)}", duration=10)
66
+
67
+ raise gr.Error("โŒ Maximum retries exceeded. Please try again later.", duration=10)
68
 
69
+ # ==================== WEB SCRAPING FUNCTIONS ====================
70
 
71
+ def extract_links(url: str, progress=gr.Progress()) -> List[str]:
72
+ """Extract links from URL with progress tracking"""
73
+ if not url:
74
+ raise gr.Error("โŒ Please provide a URL", duration=5)
75
+
76
+ progress(0, desc="Connecting to website...")
77
+
78
  try:
79
+ response = make_request_with_retry(
80
+ "POST",
81
+ "/extract_links",
82
+ json={"url": url},
83
+ timeout=30
84
+ )
85
+ progress(1, desc="Links extracted!")
86
+ return response.json()["unique_links"]
87
+ except Exception as e:
88
+ raise gr.Error(f"โŒ Failed to extract links: {str(e)}", duration=10)
89
 
90
+ def extract_text(urls: List[str], progress=gr.Progress()) -> str:
91
+ """Extract text from URLs with progress tracking"""
92
+ progress(0, desc="Starting text extraction...")
93
+
94
  try:
95
+ response = make_request_with_retry(
96
+ "POST",
97
+ "/extract_text",
98
+ json=urls,
99
+ timeout=120
100
+ )
101
+ progress(1, desc="Text extraction complete!")
102
+ return response.json()["file_saved"]
103
+ except Exception as e:
104
+ raise gr.Error(f"โŒ Failed to extract text: {str(e)}", duration=10)
105
 
106
+ def perform_rag(file_path: str, prompt: str, progress=gr.Progress()) -> dict:
107
+ """Perform RAG with progress tracking"""
108
+ progress(0.3, desc="Analyzing content...")
109
+
110
+ try:
111
+ response = make_request_with_retry(
112
+ "POST",
113
+ "/rag",
114
+ json={"file_path": file_path, "prompt": prompt},
115
+ timeout=60
116
+ )
117
+ progress(1, desc="Analysis complete!")
118
+ return response.json()
119
+ except Exception as e:
120
+ raise gr.Error(f"โŒ Failed to perform RAG: {str(e)}", duration=10)
121
 
122
+ def process_multiple_links(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
123
+ """Process multiple links with comprehensive progress tracking"""
124
  if not url or not prompt:
125
+ raise gr.Error("โŒ Please provide both URL and prompt", duration=5)
 
 
 
 
126
 
127
  try:
128
+ progress(0, desc="๐Ÿ” Extracting links from webpage...")
129
+ links = extract_links(url, progress)
130
+
131
+ progress(0.3, desc=f"๐Ÿ“„ Found {len(links)} links. Processing top 5...")
132
  sample_links = links[:5]
133
+ file_path = extract_text(sample_links, progress)
134
+
135
+ progress(0.7, desc="๐Ÿค– Generating AI response...")
136
+ result = perform_rag(file_path, prompt, progress)
137
+
138
+ progress(1, desc="โœ… Complete!")
139
 
140
  status_msg = f"โœ… Processed {len(sample_links)} pages from {len(links)} total links found"
141
  response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
 
142
 
143
+ return status_msg, response_text, result['sources']
144
 
145
+ except gr.Error:
146
+ raise
147
  except Exception as e:
148
+ raise gr.Error(f"โŒ Processing error: {str(e)}", duration=10)
149
 
150
+ def process_homepage_only(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
151
+ """Process homepage only with progress tracking"""
 
152
  if not url or not prompt:
153
+ raise gr.Error("โŒ Please provide both URL and prompt", duration=5)
 
 
 
 
154
 
155
  try:
156
+ progress(0.2, desc="๐Ÿ“„ Extracting homepage content...")
157
+ file_path = extract_text([url], progress)
158
+
159
+ progress(0.6, desc="๐Ÿค– Generating AI response...")
160
+ result = perform_rag(file_path, prompt, progress)
161
+
162
+ progress(1, desc="โœ… Complete!")
163
 
164
  status_msg = "โœ… Processed homepage content"
165
  response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
 
166
 
167
+ return status_msg, response_text, result['sources']
168
 
169
+ except gr.Error:
170
+ raise
171
  except Exception as e:
172
+ raise gr.Error(f"โŒ Processing error: {str(e)}", duration=10)
173
 
174
+ # ==================== DOCUMENT UPLOAD FUNCTIONS ====================
175
 
176
+ def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
177
+ """Upload single document with progress tracking"""
 
 
178
  if not file:
179
+ raise gr.Error("โŒ Please select a file to upload", duration=5)
180
 
181
  if not collection_name:
182
  collection_name = "default"
183
 
184
  try:
185
+ progress(0.1, desc="๐Ÿ“ค Uploading document...")
 
 
 
 
186
 
187
+ with open(file.name if hasattr(file, 'name') else file, 'rb') as f:
188
+ files = {'file': (os.path.basename(file.name if hasattr(file, 'name') else file), f)}
189
+ params = {'collection_name': collection_name}
 
 
 
190
 
191
+ progress(0.4, desc="๐Ÿ”„ Processing document...")
192
+ response = make_request_with_retry(
193
+ "POST",
194
+ "/upload_document",
195
+ files=files,
196
+ params=params,
197
+ timeout=180
198
+ )
199
+
200
+ progress(1, desc="โœ… Upload complete!")
201
+
202
+ result = response.json()
203
+ status_msg = f"""โœ… Successfully uploaded '{result['filename']}'
204
+ ๐Ÿ“Š Created {result['chunks_created']} chunks
205
+ ๐Ÿ—‚๏ธ Collection: {collection_name}
206
+ ๐Ÿ“ File Type: {result['file_type']}"""
207
+
208
+ return status_msg, result
209
+
210
+ except gr.Error:
211
+ raise
212
  except Exception as e:
213
+ raise gr.Error(f"โŒ Upload failed: {str(e)}", duration=10)
 
214
 
215
+ def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
216
+ """Upload multiple documents with progress tracking"""
217
  if not files or len(files) == 0:
218
+ raise gr.Error("โŒ Please select files to upload", duration=5)
219
 
220
  if not collection_name:
221
  collection_name = "default"
222
 
223
  try:
224
+ progress(0.1, desc=f"๐Ÿ“ค Uploading {len(files)} documents...")
 
225
 
226
+ files_to_upload = []
227
+ for idx, file in enumerate(files):
228
+ file_path = file.name if hasattr(file, 'name') else file
229
+ with open(file_path, 'rb') as f:
230
  file_content = f.read()
231
  files_to_upload.append(
232
+ ('files', (os.path.basename(file_path), file_content))
233
  )
234
+ progress((idx + 1) / len(files) * 0.5, desc=f"Reading file {idx + 1}/{len(files)}...")
235
 
236
+ progress(0.5, desc="๐Ÿ”„ Processing all documents...")
237
  params = {'collection_name': collection_name}
238
+ response = make_request_with_retry(
239
+ "POST",
240
+ "/upload_multiple_documents",
241
+ files=files_to_upload,
242
+ params=params,
243
+ timeout=300
244
+ )
245
 
246
+ progress(1, desc="โœ… All uploads complete!")
247
+
248
+ result = response.json()
249
+ status_msg = f"""โœ… Successfully uploaded {result['successful_uploads']} files
250
+ โŒ Failed: {result['failed_uploads']}
251
+ ๐Ÿ—‚๏ธ Collection: {collection_name}"""
252
+
253
+ return status_msg, result
254
+
255
+ except gr.Error:
256
+ raise
257
  except Exception as e:
258
+ raise gr.Error(f"โŒ Upload failed: {str(e)}", duration=10)
259
 
260
+ def query_rag_documents(query: str, collection_name: str, top_k: int, progress=gr.Progress()) -> Tuple[str, str, dict]:
261
+ """Query documents with progress tracking"""
 
262
  if not query:
263
+ raise gr.Error("โŒ Please enter a query", duration=5)
264
 
265
  if not collection_name:
266
+ raise gr.Error("โŒ Please select a collection", duration=5)
267
 
268
  try:
269
+ progress(0.3, desc="๐Ÿ” Searching documents...")
270
+
271
+ response = make_request_with_retry(
272
+ "POST",
273
+ "/query_documents",
274
+ json={
275
+ "query": query,
276
+ "collection_name": collection_name,
277
+ "top_k": top_k
278
+ },
279
+ timeout=60
280
+ )
281
+
282
+ progress(0.8, desc="๐Ÿค– Generating answer...")
283
+ result = response.json()
284
+
285
+ progress(1, desc="โœ… Complete!")
286
+
287
+ if "I couldn't find this information" in result['answer']:
288
+ status_msg = "โš ๏ธ No relevant information found in documents"
289
  else:
290
+ status_msg = f"โœ… Found relevant information from {len(result['sources'])} sources"
291
+
292
+ answer_text = f"**Query:** {result['query']}\n\n**Answer:** {result['answer']}"
293
+
294
+ return status_msg, answer_text, result['sources']
295
+
296
+ except gr.Error:
297
+ raise
298
  except Exception as e:
299
+ raise gr.Error(f"โŒ Query failed: {str(e)}", duration=10)
300
 
301
+ def list_all_collections() -> Tuple[str, dict, gr.Dropdown]:
302
+ """List all collections with error handling"""
 
303
  try:
304
+ response = make_request_with_retry("GET", "/list_collections", timeout=10)
305
+ result = response.json()
306
+ collections = result['collections']
307
+
308
+ if not collections:
309
+ return "๐Ÿ“‚ No collections found. Upload documents to create a collection.", None, gr.Dropdown(choices=["default"], value="default")
310
+
311
+ summary = f"๐Ÿ“Š **Total Collections:** {len(collections)}\n\n"
312
+ for col in collections:
313
+ summary += f"๐Ÿ—‚๏ธ **{col['collection_name']}**\n"
314
+ summary += f" - Chunks: {col['total_chunks']}\n"
315
+ summary += f" - Dimension: {col['dimension']}\n\n"
316
+
317
+ collection_names = [col['collection_name'] for col in collections]
318
+
319
+ return summary, result, gr.Dropdown(choices=collection_names, value=collection_names[0] if collection_names else "default")
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  except Exception as e:
322
+ raise gr.Error(f"โŒ Failed to list collections: {str(e)}", duration=10)
323
 
324
+ def delete_collection(collection_name: str) -> Tuple[str, str, dict, gr.Dropdown]:
325
+ """Delete collection with confirmation"""
 
326
  if not collection_name:
327
+ raise gr.Error("โŒ Please select a collection to delete", duration=5)
328
 
329
  try:
330
+ response = make_request_with_retry(
331
+ "DELETE",
332
+ f"/delete_collection/{collection_name}",
333
+ timeout=10
334
+ )
335
+
336
+ status = f"โœ… Successfully deleted collection '{collection_name}'"
337
+
338
+ # Refresh collections list
339
+ summary, result, dropdown = list_all_collections()
340
+
341
+ return status, summary, result, dropdown
342
 
 
 
 
 
 
343
  except Exception as e:
344
+ raise gr.Error(f"โŒ Failed to delete collection: {str(e)}", duration=10)
345
 
346
+ def get_system_health() -> Tuple[str, dict]:
347
+ """Get system health information"""
 
348
  try:
349
  is_healthy, health_data = check_api_health()
350
 
351
  if not is_healthy:
352
+ raise gr.Error("โŒ System is offline. Please refresh the page.", duration=None)
353
 
354
+ health_summary = f"""๐ŸŸข **System Status: Healthy**
 
355
 
356
  ๐Ÿ“Š **Configuration:**
357
  - Supabase: {'โœ… Configured' if health_data.get('supabase_configured') else 'โŒ Not Configured'}
 
361
  ๐Ÿ“ **Vector Stores:**
362
  - Total Collections: {health_data.get('vector_stores', 0)}
363
  - Total Chunks: {health_data.get('total_chunks', 0)}
364
+ - Storage Path: {health_data.get('persistent_storage', 'N/A')}
365
+
366
+ ๐Ÿ“š **Available Collections:**
367
+ {', '.join(health_data.get('collections', [])) if health_data.get('collections') else 'None'}
368
  """
369
  return health_summary, health_data
370
 
371
+ except gr.Error:
372
+ raise
373
  except Exception as e:
374
+ raise gr.Error(f"โŒ Health check failed: {str(e)}", duration=10)
 
375
 
376
+ # ==================== GRADIO UI ====================
377
 
378
+ # Custom CSS (same as before)
379
  custom_css = """
380
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
381
  * {
382
  font-family: 'Inter', sans-serif !important;
383
  }
 
384
  .gradio-container {
385
  background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
386
  background-size: 400% 400%;
 
391
  50% { background-position: 100% 50%; }
392
  100% { background-position: 0% 50%; }
393
  }
 
394
  .main-container {
395
  backdrop-filter: blur(20px);
396
  background: rgba(20, 20, 30, 0.85);
 
401
  animation: fadeInUp 0.8s ease;
402
  }
403
  @keyframes fadeInUp {
404
+ from { opacity: 0; transform: translateY(30px); }
405
+ to { opacity: 1; transform: translateY(0); }
 
 
 
 
 
 
406
  }
 
407
  .animated-title {
408
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
409
  background-size: 200% 200%;
 
422
  50% { background-position: 100% 50%; }
423
  100% { background-position: 0% 50%; }
424
  }
 
425
  .floating-icon {
426
  animation: float 3s ease-in-out infinite;
427
  display: inline-block;
 
430
  0%, 100% { transform: translateY(0px); }
431
  50% { transform: translateY(-10px); }
432
  }
 
433
  textarea, input[type="text"] {
434
  font-size: 1.1rem !important;
435
  border-radius: 12px !important;
 
444
  transform: translateY(-2px);
445
  background: rgba(35, 35, 50, 0.95) !important;
446
  }
 
447
  label {
448
  font-weight: 600 !important;
449
  color: #b0b0b0 !important;
450
  font-size: 1.1rem !important;
 
451
  }
 
452
  .gr-button {
453
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
454
  color: #0a0a0f !important;
 
463
  .gr-button:hover {
464
  transform: translateY(-3px) !important;
465
  box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
 
466
  }
 
 
 
 
467
  .output-box {
468
  background: rgba(30, 30, 45, 0.95) !important;
469
  border-radius: 16px !important;
470
  border: 1px solid rgba(0, 242, 254, 0.2) !important;
471
  backdrop-filter: blur(10px);
 
472
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
473
  color: #e0e0e0 !important;
474
  padding: 1.5rem !important;
475
+ min-height: 150px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  }
477
  .output-box strong {
478
  color: #4facfe !important;
479
  font-weight: 600 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  }
481
  """
482
 
483
+ # Build interface
484
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as interface:
 
 
485
  gr.HTML("""
486
  <div class="main-container">
487
  <h1 class="animated-title">
488
+ <span class="floating-icon">๐ŸŒ</span> Advanced RAG Assistant
489
  </h1>
490
+ <p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
491
+ Upload documents (PDF/Markdown) or extract from web pages - Ask questions using AI-powered retrieval
492
  </p>
493
  </div>
494
  """)
495
 
496
  with gr.Tabs() as tabs:
497
+ # Web Scraping Tabs
 
498
  with gr.Tab("๐Ÿ”— Multiple Links Analysis"):
499
  with gr.Row():
500
  with gr.Column():
501
+ url_input_multi = gr.Textbox(label="๐ŸŒ Website URL", placeholder="https://example.com")
502
+ prompt_input_multi = gr.Textbox(label="๐Ÿ’ญ Your Question", placeholder="What is this website about?", lines=3)
 
 
 
 
 
 
 
 
 
503
  submit_btn_multi = gr.Button("โœจ Analyze Multiple Links", variant="primary")
504
 
505
  with gr.Row():
 
517
  with gr.Tab("๐Ÿ  Homepage Only Analysis"):
518
  with gr.Row():
519
  with gr.Column():
520
+ url_input_home = gr.Textbox(label="๐ŸŒ Website URL", placeholder="https://example.com")
521
+ prompt_input_home = gr.Textbox(label="๐Ÿ’ญ Your Question", placeholder="What is this website about?", lines=3)
 
 
 
 
 
 
 
 
 
522
  submit_btn_home = gr.Button("โœจ Analyze Homepage", variant="primary")
523
 
524
  with gr.Row():
 
533
  outputs=[status_output_home, response_output_home, sources_output_home]
534
  )
535
 
536
+ # Document Upload Tab
 
537
  with gr.Tab("๐Ÿ“„ Document Upload & Query"):
538
  gr.Markdown("""
539
  ### Upload PDF or Markdown documents and query them using RAG
540
  - Supports **PDF** and **Markdown** files
541
+ - Documents are chunked and stored in FAISS vector database
542
+ - Organize documents into collections for better management
543
  """)
544
 
545
  with gr.Row():
546
  with gr.Column():
547
  gr.Markdown("#### ๐Ÿ“ค Upload Documents")
548
  collection_name_upload = gr.Textbox(
549
+ label="๐Ÿ—‚๏ธ Collection Name",
550
  placeholder="default",
551
+ value="default"
 
552
  )
553
 
554
  with gr.Tab("Single File"):
555
  file_upload_single = gr.File(
556
  label="๐Ÿ“ Select Document (PDF/Markdown)",
557
+ file_types=[".pdf", ".md", ".txt"]
 
558
  )
559
  upload_btn_single = gr.Button("๐Ÿ“ค Upload Single Document", variant="primary")
560
  upload_status_single = gr.Textbox(label="๐Ÿ“Š Upload Status", elem_classes="output-box")
 
564
  file_upload_multi = gr.File(
565
  label="๐Ÿ“ Select Documents (PDF/Markdown)",
566
  file_count="multiple",
567
+ file_types=[".pdf", ".md", ".txt"]
 
568
  )
569
+ upload_btn_multi_doc = gr.Button("๐Ÿ“ค Upload Multiple Documents", variant="primary")
570
  upload_status_multi = gr.Textbox(label="๐Ÿ“Š Upload Status", elem_classes="output-box")
571
  upload_result_multi = gr.JSON(label="๐Ÿ“‹ Upload Details", elem_classes="output-box")
572
 
573
  with gr.Column():
574
  gr.Markdown("#### ๐Ÿ” Query Documents")
575
 
 
576
  refresh_btn = gr.Button("๐Ÿ”„ Refresh Collections", variant="secondary")
577
 
578
  collection_dropdown = gr.Dropdown(
579
  label="๐Ÿ—‚๏ธ Select Collection",
580
  choices=["default"],
581
+ value="default"
 
582
  )
583
 
584
  query_input = gr.Textbox(
585
  label="๐Ÿ’ญ Your Question",
586
  placeholder="Ask a question about your documents...",
587
+ lines=3
 
588
  )
589
 
590
  top_k_slider = gr.Slider(
 
592
  maximum=10,
593
  value=3,
594
  step=1,
595
+ label="๐Ÿ“Š Number of Sources (top-k)"
 
596
  )
597
 
598
  query_btn = gr.Button("๐Ÿ” Search Documents", variant="primary")
 
601
  query_response = gr.Markdown(label="๐Ÿค– AI Answer", elem_classes="output-box")
602
  query_sources = gr.JSON(label="๐Ÿ“š Source Citations", elem_classes="output-box")
603
 
604
+ # Connect buttons
605
  upload_btn_single.click(
606
  fn=upload_single_document,
607
  inputs=[file_upload_single, collection_name_upload],
608
  outputs=[upload_status_single, upload_result_single]
609
  )
610
 
611
+ upload_btn_multi_doc.click(
612
  fn=upload_multiple_documents,
613
  inputs=[file_upload_multi, collection_name_upload],
614
  outputs=[upload_status_multi, upload_result_multi]
615
  )
616
 
 
617
  query_btn.click(
618
  fn=query_rag_documents,
619
  inputs=[query_input, collection_dropdown, top_k_slider],
620
  outputs=[query_status, query_response, query_sources]
621
  )
622
 
 
623
  def refresh_collections():
624
+ _, _, dropdown = list_all_collections()
625
+ return dropdown
 
 
626
 
627
  refresh_btn.click(
628
  fn=refresh_collections,
 
629
  outputs=[collection_dropdown]
630
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
631
 
632
+ # Collection Management Tab
633
  with gr.Tab("๐Ÿ—‚๏ธ Collection Management"):
634
+ gr.Markdown("### Manage Your Document Collections")
 
 
 
635
 
636
  with gr.Row():
637
  with gr.Column():
 
641
 
642
  with gr.Column():
643
  gr.Markdown("#### ๐Ÿ—‘๏ธ Delete Collection")
644
+ collection_to_delete = gr.Dropdown(label="๐Ÿ—‚๏ธ Select Collection to Delete", choices=["default"])
 
 
 
 
645
  delete_btn = gr.Button("๐Ÿ—‘๏ธ Delete Collection", variant="stop")
646
  delete_status = gr.Textbox(label="๐Ÿ“Š Status", elem_classes="output-box")
647
 
 
 
 
 
 
 
 
648
  list_btn.click(
649
+ fn=list_all_collections,
 
650
  outputs=[collections_output, collections_json, collection_to_delete]
651
  )
652
 
 
 
 
 
 
 
 
 
653
  delete_btn.click(
654
+ fn=delete_collection,
655
  inputs=[collection_to_delete],
656
  outputs=[delete_status, collections_output, collections_json, collection_to_delete]
657
  )
 
 
 
 
 
 
 
 
 
 
658
 
659
+ # System Health Tab
660
  with gr.Tab("โš™๏ธ System Health"):
661
+ gr.Markdown("### System Status & Configuration")
 
 
 
662
 
663
  health_check_btn = gr.Button("๐Ÿ” Check System Health", variant="primary")
664
  health_output = gr.Markdown(label="๐ŸŸข System Status", elem_classes="output-box")
 
666
 
667
  health_check_btn.click(
668
  fn=get_system_health,
 
669
  outputs=[health_output, health_json]
670
  )
 
 
 
 
 
 
 
 
 
 
 
671
 
 
672
  gr.HTML("""
673
+ <div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">
674
+ <p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
675
+ โ„น๏ธ <strong>Note:</strong> This app features automatic retry logic and progress tracking.
676
+ If you encounter errors, the system will automatically retry. Large files may take longer to process.
677
  </p>
678
  </div>
679
  """)
680
 
681
+ # Launch configuration for Hugging Face Spaces
 
682
  if __name__ == "__main__":
683
+ # Check API health on startup
684
+ is_healthy, _ = check_api_health(max_attempts=5)
685
+
686
+ if not is_healthy:
687
+ print("โš ๏ธ Warning: API is not responding. The app will launch but may not work correctly.")
688
+
689
  interface.launch(
690
  server_name="0.0.0.0",
691
  server_port=7860,
692
  share=False,
693
  show_error=True,
694
+ show_api=False
695
  )