Princess3 commited on
Commit
b2fbd32
Β·
verified Β·
1 Parent(s): a95256f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +814 -0
app.py ADDED
@@ -0,0 +1,814 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ NZ Legislation Loophole Analysis - Hugging Face Spaces App
4
+
5
+ Root-level app.py for Hugging Face Spaces deployment.
6
+ Adapted for Spaces memory constraints and session-based caching.
7
+ """
8
+
9
+ import streamlit as st
10
+ import sys
11
+ import os
12
+ import warnings
13
+ from pathlib import Path
14
+
15
+ # Add current directory to Python path for imports
16
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
17
+
18
+ # Suppress warnings for cleaner output
19
+ warnings.filterwarnings('ignore')
20
+
21
+ # Import core modules with error handling for Spaces
22
+ try:
23
+ from streamlit_app.core.cache_manager import CacheManager, get_cache_manager
24
+ from streamlit_app.core.text_processor import TextProcessor
25
+ from streamlit_app.core.llm_analyzer import LLMAnalyzer
26
+ from streamlit_app.core.dataset_builder import DatasetBuilder
27
+ from streamlit_app.utils.config import ConfigManager
28
+ from streamlit_app.utils.performance import PerformanceMonitor
29
+ from streamlit_app.utils.ui_helpers import UIHelpers
30
+ except ImportError as e:
31
+ st.error(f"❌ Import Error: {e}")
32
+ st.error("Please ensure all required packages are installed.")
33
+ st.stop()
34
+
35
+ # Configure page settings for Spaces
36
+ st.set_page_config(
37
+ page_title="NZ Legislation Loophole Analyzer",
38
+ page_icon="βš–οΈ",
39
+ layout="wide",
40
+ initial_sidebar_state="expanded",
41
+ menu_items={
42
+ 'Get Help': 'https://huggingface.co/spaces/your-space',
43
+ 'Report a bug': 'https://github.com/your-repo/issues',
44
+ 'About': '''
45
+ ## NZ Legislation Loophole Analyzer
46
+ AI-powered analysis of New Zealand legislation to identify
47
+ potential loopholes, ambiguities, and unintended consequences.
48
+
49
+ **Version:** 1.0.0 (Spaces Edition)
50
+ **Platform:** Hugging Face Spaces
51
+ **Built with:** Streamlit & Llama.cpp
52
+ '''
53
+ }
54
+ )
55
+
56
+ # Spaces-specific configuration
57
+ SPACES_CONFIG = {
58
+ 'max_memory_mb': 512, # Conservative memory limit for Spaces
59
+ 'cache_enabled': True,
60
+ 'persistent_cache': False, # Session-based only
61
+ 'model_path': 'qwen3.gguf', # Default model
62
+ 'context_length': 4096, # Smaller context for memory
63
+ 'max_tokens': 2048, # Smaller responses
64
+ }
65
+
66
+ def initialize_spaces_session():
67
+ """Initialize session state optimized for Spaces"""
68
+ if 'cache_manager' not in st.session_state:
69
+ # Initialize with Spaces-optimized settings
70
+ st.session_state.cache_manager = CacheManager(
71
+ max_memory_mb=SPACES_CONFIG['max_memory_mb'],
72
+ persistent=False, # No persistent storage in Spaces
73
+ ttl_hours=1 # Shorter TTL for memory efficiency
74
+ )
75
+
76
+ if 'config_manager' not in st.session_state:
77
+ st.session_state.config_manager = ConfigManager()
78
+
79
+ # Override with Spaces-optimized defaults
80
+ spaces_defaults = {
81
+ 'model': {
82
+ 'path': SPACES_CONFIG['model_path'],
83
+ 'context_length': SPACES_CONFIG['context_length'],
84
+ 'max_tokens': SPACES_CONFIG['max_tokens'],
85
+ 'temperature': 0.3,
86
+ 'top_p': 0.85,
87
+ },
88
+ 'cache': {
89
+ 'enabled': SPACES_CONFIG['cache_enabled'],
90
+ 'max_size_mb': SPACES_CONFIG['max_memory_mb'],
91
+ 'persistent': False,
92
+ 'ttl_hours': 1,
93
+ },
94
+ 'processing': {
95
+ 'chunk_size': 2048, # Smaller chunks for memory
96
+ 'chunk_overlap': 128,
97
+ 'batch_size': 4, # Smaller batch size
98
+ 'clean_text': True,
99
+ }
100
+ }
101
+
102
+ # Update configuration with Spaces defaults
103
+ st.session_state.config_manager.update_config(spaces_defaults)
104
+
105
+ if 'performance_monitor' not in st.session_state:
106
+ st.session_state.performance_monitor = PerformanceMonitor(max_history=100)
107
+
108
+ if 'text_processor' not in st.session_state:
109
+ st.session_state.text_processor = TextProcessor()
110
+
111
+ if 'current_analysis' not in st.session_state:
112
+ st.session_state.current_analysis = None
113
+
114
+ if 'analysis_results' not in st.session_state:
115
+ st.session_state.analysis_results = []
116
+
117
+ if 'processing_status' not in st.session_state:
118
+ st.session_state.processing_status = {
119
+ 'is_running': False,
120
+ 'progress': 0,
121
+ 'current_task': '',
122
+ 'total_chunks': 0,
123
+ 'processed_chunks': 0
124
+ }
125
+
126
+ if 'model_loaded' not in st.session_state:
127
+ st.session_state.model_loaded = False
128
+
129
+ if 'llm_analyzer' not in st.session_state:
130
+ st.session_state.llm_analyzer = None
131
+
132
+ def show_spaces_optimized_home_page():
133
+ """Home page optimized for Spaces"""
134
+ st.title("🏠 NZ Legislation Loophole Analyzer")
135
+ st.markdown("### AI-Powered Legal Analysis (Spaces Edition)")
136
+
137
+ # Spaces-specific warnings and info
138
+ with st.expander("⚠️ Spaces Environment Notes", expanded=False):
139
+ st.info("""
140
+ **Running on Hugging Face Spaces**
141
+ - Memory optimized for cloud deployment
142
+ - Session-based caching (resets between visits)
143
+ - Use smaller models for best performance
144
+ - Analysis results persist during your session
145
+ """)
146
+
147
+ col1, col2 = st.columns([2, 1])
148
+
149
+ with col1:
150
+ st.markdown("""
151
+ This AI-powered tool analyzes New Zealand legislation to identify:
152
+
153
+ πŸ” **Potential Loopholes** - Legal ambiguities that could be exploited
154
+ πŸ“‹ **Unintended Consequences** - Hidden implications in legislative language
155
+ βš–οΈ **Ambiguities** - Vague or unclear legal provisions
156
+ 🎯 **Circumvention Strategies** - Ways legislation might be bypassed
157
+
158
+ **Key Features:**
159
+ - **Smart Caching**: Avoid re-processing identical content during your session
160
+ - **Memory Optimized**: Designed for Spaces memory constraints
161
+ - **Real-time Progress**: Live processing status and performance metrics
162
+ - **Export Options**: Download results in multiple formats
163
+ """)
164
+
165
+ st.markdown("### Quick Start")
166
+ st.markdown("""
167
+ 1. **Upload** your NZ legislation files (JSON lines or raw text)
168
+ 2. **Configure** analysis parameters (use smaller models for Spaces)
169
+ 3. **Process** the legislation with AI-powered analysis
170
+ 4. **Review** results with interactive visualizations
171
+ 5. **Export** findings before your session ends
172
+ """)
173
+
174
+ with col2:
175
+ st.markdown("### Current Configuration")
176
+
177
+ config = st.session_state.config_manager.get_config()
178
+
179
+ # Model settings
180
+ st.subheader("πŸ€– Model Settings")
181
+ st.info(f"**Model:** {config['model']['path']}")
182
+ st.info(f"**Context Length:** {config['model']['context_length']}")
183
+ st.info(f"**Max Tokens:** {config['model']['max_tokens']}")
184
+
185
+ # Processing settings
186
+ st.subheader("βš™οΈ Processing")
187
+ st.info(f"**Chunk Size:** {config['processing']['chunk_size']}")
188
+ st.info(f"**Overlap:** {config['processing']['chunk_overlap']}")
189
+ st.info(f"**Batch Size:** {config['processing']['batch_size']}")
190
+
191
+ # Cache settings
192
+ st.subheader("🧠 Cache")
193
+ cache_stats = st.session_state.cache_manager.get_stats()
194
+ st.info(f"**Status:** {'Active' if cache_stats['enabled'] else 'Disabled'}")
195
+ st.info(f"**Max Memory:** {SPACES_CONFIG['max_memory_mb']}MB")
196
+ st.info(f"**Hit Rate:** {cache_stats['hit_rate']:.1f}%")
197
+
198
+ # Memory warning
199
+ perf_stats = st.session_state.performance_monitor.get_stats()
200
+ memory_usage = perf_stats['memory_usage_mb']
201
+ if memory_usage > SPACES_CONFIG['max_memory_mb'] * 0.8:
202
+ st.warning(f"⚠️ High Memory Usage: {memory_usage:.1f}MB")
203
+ else:
204
+ st.success(f"βœ… Memory Usage: {memory_usage:.1f}MB")
205
+
206
+ if st.button("πŸš€ Start Analysis", type="primary", use_container_width=True):
207
+ st.switch_page("pages/1_upload.py")
208
+
209
+ def show_spaces_optimized_upload_page():
210
+ """Upload page optimized for Spaces"""
211
+ st.title("πŸ“€ Upload & Process Legislation")
212
+
213
+ # Memory warning for Spaces
214
+ with st.expander("πŸ’‘ Spaces Optimization Tips", expanded=False):
215
+ st.info("""
216
+ **For Best Performance on Spaces:**
217
+ - Use smaller models (0.8B-1.5B parameters)
218
+ - Process files individually for large documents
219
+ - Keep chunk sizes under 2048 characters
220
+ - Monitor memory usage in the sidebar
221
+ """)
222
+
223
+ # File upload section
224
+ st.subheader("πŸ“ Upload Legislation Files")
225
+
226
+ col1, col2 = st.columns([1, 1])
227
+
228
+ with col1:
229
+ uploaded_files = st.file_uploader(
230
+ "Select NZ legislation files",
231
+ accept_multiple_files=True,
232
+ type=['json', 'txt', 'jsonl'],
233
+ help="Upload JSON lines format (.jsonl), JSON arrays (.json), or raw text (.txt) files",
234
+ key="spaces_file_uploader"
235
+ )
236
+
237
+ if uploaded_files:
238
+ st.success(f"πŸ“„ {len(uploaded_files)} file(s) selected")
239
+
240
+ # Show file details with size warnings
241
+ for file in uploaded_files:
242
+ with st.expander(f"πŸ“‹ {file.name}"):
243
+ size_mb = file.size / (1024 * 1024)
244
+ if size_mb > 10: # Warning for large files
245
+ st.warning(".1f")
246
+ else:
247
+ st.info(".1f")
248
+
249
+ st.write(f"**Type:** {file.type}")
250
+
251
+ # Preview content
252
+ if file.type in ['text/plain', 'application/json']:
253
+ content = file.read().decode('utf-8')
254
+ preview_length = min(300, len(content))
255
+ st.text_area("Preview", content[:preview_length] + "..." if len(content) > preview_length else content,
256
+ height=100, disabled=True)
257
+ file.seek(0) # Reset file pointer
258
+
259
+ with col2:
260
+ # Processing configuration optimized for Spaces
261
+ st.subheader("βš™οΈ Processing Configuration")
262
+
263
+ config = st.session_state.config_manager.get_config()
264
+
265
+ # Model settings with Spaces warnings
266
+ with st.expander("πŸ€– Model Configuration", expanded=True):
267
+ st.info("πŸ’‘ Use smaller models (0.8B-1.5B) for best Spaces performance")
268
+
269
+ model_path = st.text_input(
270
+ "Model Path",
271
+ value=config['model']['path'],
272
+ help="Path to your GGUF model file (use small models for Spaces)"
273
+ )
274
+
275
+ context_length = st.slider(
276
+ "Context Length",
277
+ min_value=1024,
278
+ max_value=8192, # Reduced max for Spaces
279
+ value=min(config['model']['context_length'], 4096),
280
+ step=512,
281
+ help="Maximum context length for the model"
282
+ )
283
+
284
+ max_tokens = st.slider(
285
+ "Max Response Tokens",
286
+ min_value=256,
287
+ max_value=4096,
288
+ value=min(config['model']['max_tokens'], 2048),
289
+ step=128,
290
+ help="Maximum tokens in model response"
291
+ )
292
+
293
+ # Text processing settings
294
+ with st.expander("πŸ“ Text Processing", expanded=True):
295
+ chunk_size = st.slider(
296
+ "Chunk Size",
297
+ min_value=512,
298
+ max_value=4096, # Reduced for Spaces memory
299
+ value=min(config['processing']['chunk_size'], 2048),
300
+ step=256,
301
+ help="Size of text chunks for processing"
302
+ )
303
+
304
+ chunk_overlap = st.slider(
305
+ "Chunk Overlap",
306
+ min_value=32,
307
+ max_value=512,
308
+ value=config['processing']['chunk_overlap'],
309
+ step=32,
310
+ help="Overlap between chunks for context preservation"
311
+ )
312
+
313
+ # Analysis settings
314
+ with st.expander("πŸ” Analysis Settings", expanded=True):
315
+ analysis_depth = st.select_slider(
316
+ "Analysis Depth",
317
+ options=["Basic", "Standard", "Detailed"], # Removed comprehensive for memory
318
+ value=config['analysis']['depth'],
319
+ help="Level of detail in legal analysis (use Standard for Spaces)"
320
+ )
321
+
322
+ include_recommendations = st.checkbox(
323
+ "Include Recommendations",
324
+ value=config['analysis']['include_recommendations'],
325
+ help="Generate specific recommendations for addressing identified issues"
326
+ )
327
+
328
+ # Process button and status
329
+ col1, col2, col3 = st.columns([1, 1, 1])
330
+
331
+ with col1:
332
+ if st.button("πŸ”„ Start Processing", type="primary", use_container_width=True):
333
+ if not uploaded_files:
334
+ st.error("Please upload at least one legislation file")
335
+ else:
336
+ start_spaces_processing(uploaded_files, {
337
+ 'model': {
338
+ 'path': model_path,
339
+ 'context_length': context_length,
340
+ 'max_tokens': max_tokens
341
+ },
342
+ 'processing': {
343
+ 'chunk_size': chunk_size,
344
+ 'chunk_overlap': chunk_overlap
345
+ },
346
+ 'analysis': {
347
+ 'depth': analysis_depth,
348
+ 'include_recommendations': include_recommendations
349
+ }
350
+ })
351
+
352
+ with col2:
353
+ if st.button("⏹️ Stop Processing", use_container_width=True):
354
+ stop_processing()
355
+
356
+ with col3:
357
+ if st.button("πŸ“Š View Results", use_container_width=True):
358
+ st.switch_page("pages/2_analysis.py")
359
+
360
+ def start_spaces_processing(files, config):
361
+ """Start processing optimized for Spaces"""
362
+ # Check memory before starting
363
+ perf_stats = st.session_state.performance_monitor.get_stats()
364
+ if perf_stats['memory_usage_mb'] > SPACES_CONFIG['max_memory_mb'] * 0.9:
365
+ st.warning("⚠️ High memory usage detected. Consider clearing cache first.")
366
+ if st.button("Clear Cache and Continue"):
367
+ st.session_state.cache_manager.clear_cache()
368
+ st.rerun()
369
+ return
370
+
371
+ st.session_state.processing_status = {
372
+ 'is_running': True,
373
+ 'progress': 0,
374
+ 'current_task': 'Initializing...',
375
+ 'total_chunks': 0,
376
+ 'processed_chunks': 0
377
+ }
378
+
379
+ # Update configuration
380
+ st.session_state.config_manager.update_config(config)
381
+
382
+ # Add memory warning
383
+ st.info("πŸ’‘ Processing on Spaces - this may take longer than local execution")
384
+
385
+ st.rerun()
386
+
387
+ def stop_processing():
388
+ """Stop the current processing"""
389
+ st.session_state.processing_status['is_running'] = False
390
+ st.session_state.processing_status['current_task'] = 'Stopped by user'
391
+
392
+ def show_spaces_optimized_results_page():
393
+ """Results page optimized for Spaces"""
394
+ st.title("πŸ“Š Analysis Results")
395
+
396
+ # Session warning for Spaces
397
+ with st.expander("πŸ’Ύ Session-Based Storage", expanded=False):
398
+ st.warning("""
399
+ **Important:** Results are stored in your session only.
400
+ - Download results before closing your browser
401
+ - Cache resets between visits
402
+ - Consider using smaller models for faster processing
403
+ """)
404
+
405
+ if not st.session_state.analysis_results:
406
+ st.info("No analysis results available. Please upload and process legislation files first.")
407
+ return
408
+
409
+ # Results overview
410
+ st.subheader("πŸ“ˆ Results Overview")
411
+
412
+ col1, col2, col3, col4 = st.columns(4)
413
+
414
+ total_results = len(st.session_state.analysis_results)
415
+ total_loopholes = sum(len(result.get('loopholes', [])) for result in st.session_state.analysis_results)
416
+ avg_confidence = sum(result.get('confidence', 0) for result in st.session_state.analysis_results) / max(total_results, 1)
417
+
418
+ with col1:
419
+ st.metric("Total Analyses", total_results)
420
+
421
+ with col2:
422
+ st.metric("Loopholes Found", total_loopholes)
423
+
424
+ with col3:
425
+ st.metric("Avg Confidence", ".2f")
426
+
427
+ with col4:
428
+ cache_stats = st.session_state.cache_manager.get_stats()
429
+ st.metric("Cache Hit Rate", ".1f")
430
+
431
+ # Results display
432
+ st.subheader("πŸ” Detailed Results")
433
+
434
+ for i, result in enumerate(st.session_state.analysis_results):
435
+ with st.expander(f"πŸ“‹ Analysis {i+1}: {result.get('title', 'Unknown Title')}", expanded=i==0):
436
+ col1, col2 = st.columns([2, 1])
437
+
438
+ with col1:
439
+ st.markdown("**Summary:**")
440
+ st.write(result.get('summary', 'No summary available'))
441
+
442
+ st.markdown("**Key Findings:**")
443
+ for finding in result.get('loopholes', []):
444
+ st.markdown(f"- {finding}")
445
+
446
+ with col2:
447
+ st.metric("Confidence", ".2f")
448
+ st.metric("Processing Time", ".2f")
449
+ st.metric("Chunks Processed", result.get('chunks_processed', 0))
450
+
451
+ # Export options with Spaces warning
452
+ st.subheader("πŸ’Ύ Export Results")
453
+
454
+ col1, col2, col3 = st.columns(3)
455
+
456
+ with col1:
457
+ if st.button("πŸ“„ Export as JSON", use_container_width=True):
458
+ export_results('json')
459
+
460
+ with col2:
461
+ if st.button("πŸ“Š Export as CSV", use_container_width=True):
462
+ export_results('csv')
463
+
464
+ with col3:
465
+ if st.button("πŸ“‹ Export as Excel", use_container_width=True):
466
+ export_results('excel')
467
+
468
+ def export_results(format_type):
469
+ """Export analysis results in specified format"""
470
+ # TODO: Implement export functionality
471
+ st.success(f"Results exported as {format_type.upper()} - download will be available in the next version")
472
+
473
+ def show_spaces_optimized_settings_page():
474
+ """Settings page optimized for Spaces"""
475
+ st.title("βš™οΈ Settings & Configuration")
476
+
477
+ # Spaces-specific info
478
+ with st.expander("🌐 Spaces Environment", expanded=False):
479
+ st.info("""
480
+ **Spaces-Specific Settings:**
481
+ - Memory limit: 512MB cache (conservative)
482
+ - Session-based storage only
483
+ - No persistent data between visits
484
+ - Optimized for cloud performance
485
+ """)
486
+
487
+ tabs = st.tabs(["πŸ€– Model Settings", "πŸ“ Processing", "🧠 Cache", "πŸ“Š Performance"])
488
+
489
+ with tabs[0]:
490
+ st.subheader("πŸ€– Model Configuration")
491
+
492
+ config = st.session_state.config_manager.get_config()
493
+
494
+ st.info("πŸ’‘ For Spaces: Use smaller models (0.8B-1.5B parameters) for best performance")
495
+
496
+ model_path = st.text_input(
497
+ "Model Path",
498
+ value=config['model']['path'],
499
+ help="Path to your GGUF model file (smaller models recommended)"
500
+ )
501
+
502
+ context_length = st.slider(
503
+ "Context Length",
504
+ min_value=1024,
505
+ max_value=8192,
506
+ value=config['model']['context_length'],
507
+ step=512,
508
+ help="Maximum context length (smaller = faster processing)"
509
+ )
510
+
511
+ max_tokens = st.slider(
512
+ "Max Response Tokens",
513
+ min_value=256,
514
+ max_value=4096,
515
+ value=config['model']['max_tokens'],
516
+ step=128,
517
+ help="Maximum tokens in response (smaller = faster)"
518
+ )
519
+
520
+ temperature = st.slider(
521
+ "Temperature",
522
+ min_value=0.0,
523
+ max_value=1.0,
524
+ value=config['model']['temperature'],
525
+ step=0.1,
526
+ help="Controls randomness (lower = more consistent)"
527
+ )
528
+
529
+ with tabs[1]:
530
+ st.subheader("πŸ“ Text Processing")
531
+
532
+ chunk_size = st.slider(
533
+ "Chunk Size",
534
+ min_value=512,
535
+ max_value=4096,
536
+ value=config['processing']['chunk_size'],
537
+ step=256,
538
+ help="Text chunk size (smaller = more memory efficient)"
539
+ )
540
+
541
+ chunk_overlap = st.slider(
542
+ "Chunk Overlap",
543
+ min_value=32,
544
+ max_value=512,
545
+ value=config['processing']['chunk_overlap'],
546
+ step=32,
547
+ help="Overlap between chunks for context"
548
+ )
549
+
550
+ batch_size = st.slider(
551
+ "Batch Size",
552
+ min_value=1,
553
+ max_value=8, # Reduced for Spaces
554
+ value=config['processing']['batch_size'],
555
+ step=1,
556
+ help="Number of chunks to process at once (lower = less memory)"
557
+ )
558
+
559
+ with tabs[2]:
560
+ st.subheader("🧠 Cache Configuration")
561
+
562
+ enable_cache = st.checkbox(
563
+ "Enable Caching",
564
+ value=config['cache']['enabled'],
565
+ help="Use cache to avoid re-processing (recommended)"
566
+ )
567
+
568
+ st.info(f"πŸ’‘ Max cache size: {SPACES_CONFIG['max_memory_mb']}MB (fixed for Spaces)")
569
+
570
+ cache_ttl = st.slider(
571
+ "Cache TTL (hours)",
572
+ min_value=0.5,
573
+ max_value=2.0,
574
+ value=config['cache']['ttl_hours'],
575
+ step=0.5,
576
+ help="How long to keep cached results (shorter = less memory)"
577
+ )
578
+
579
+ with tabs[3]:
580
+ st.subheader("πŸ“Š Performance Monitoring")
581
+
582
+ perf_stats = st.session_state.performance_monitor.get_stats()
583
+
584
+ col1, col2, col3 = st.columns(3)
585
+
586
+ with col1:
587
+ st.metric("Memory Usage", ".1f", "MB")
588
+
589
+ with col2:
590
+ st.metric("Cache Hit Rate", ".1f", "%")
591
+
592
+ with col3:
593
+ st.metric("Active Threads", perf_stats.get('active_threads', 0))
594
+
595
+ # Performance recommendations
596
+ recommendations = st.session_state.performance_monitor.get_recommendations()
597
+ if recommendations:
598
+ st.subheader("πŸ’‘ Recommendations")
599
+ for rec in recommendations:
600
+ if "High" in rec or "Low" in rec:
601
+ st.warning(rec)
602
+ else:
603
+ st.info(rec)
604
+
605
+ # Save settings
606
+ col1, col2 = st.columns([1, 1])
607
+
608
+ with col1:
609
+ if st.button("πŸ’Ύ Save Settings", type="primary", use_container_width=True):
610
+ new_config = {
611
+ 'model': {
612
+ 'path': model_path,
613
+ 'context_length': context_length,
614
+ 'max_tokens': max_tokens,
615
+ 'temperature': temperature
616
+ },
617
+ 'processing': {
618
+ 'chunk_size': chunk_size,
619
+ 'chunk_overlap': chunk_overlap,
620
+ 'batch_size': batch_size
621
+ },
622
+ 'cache': {
623
+ 'enabled': enable_cache,
624
+ 'ttl_hours': cache_ttl
625
+ }
626
+ }
627
+
628
+ st.session_state.config_manager.update_config(new_config)
629
+ st.success("Settings saved successfully!")
630
+
631
+ with col2:
632
+ if st.button("πŸ”„ Reset to Defaults", use_container_width=True):
633
+ st.session_state.config_manager.reset_to_defaults()
634
+ st.success("Settings reset to defaults!")
635
+ st.rerun()
636
+
637
+ def show_spaces_optimized_performance_page():
638
+ """Performance page optimized for Spaces"""
639
+ st.title("πŸ“ˆ Performance Dashboard")
640
+
641
+ # Spaces-specific info
642
+ with st.expander("🌐 Spaces Performance Notes", expanded=False):
643
+ st.info("""
644
+ **Spaces Environment:**
645
+ - Memory limit: ~2-8GB shared
646
+ - Cache: Session-based only
647
+ - Performance: Optimized for cloud
648
+ - Monitoring: Real-time metrics
649
+ """)
650
+
651
+ # Real-time metrics
652
+ st.subheader("πŸ“Š Real-time Metrics")
653
+
654
+ col1, col2, col3, col4 = st.columns(4)
655
+
656
+ perf_stats = st.session_state.performance_monitor.get_stats()
657
+
658
+ with col1:
659
+ st.metric("Memory Usage", ".1f", "MB")
660
+
661
+ with col2:
662
+ st.metric("Memory %", ".1f", "%")
663
+
664
+ with col3:
665
+ st.metric("CPU Usage", ".1f", "%")
666
+
667
+ with col4:
668
+ cache_stats = st.session_state.cache_manager.get_stats()
669
+ st.metric("Cache Hit Rate", ".1f", "%")
670
+
671
+ # Memory warning for Spaces
672
+ memory_percent = perf_stats.get('memory_percent', 0)
673
+ if memory_percent > 80:
674
+ st.error("⚠️ High memory usage - consider clearing cache")
675
+ elif memory_percent > 60:
676
+ st.warning("⚠️ Moderate memory usage")
677
+ else:
678
+ st.success("βœ… Memory usage within limits")
679
+
680
+ # Cache performance
681
+ st.subheader("🧠 Cache Performance")
682
+
683
+ cache_stats = st.session_state.cache_manager.get_stats()
684
+
685
+ col1, col2, col3, col4 = st.columns(4)
686
+
687
+ with col1:
688
+ st.metric("Total Requests", cache_stats['hits'] + cache_stats['misses'])
689
+
690
+ with col2:
691
+ st.metric("Cache Hits", cache_stats['hits'])
692
+
693
+ with col3:
694
+ st.metric("Cache Misses", cache_stats['misses'])
695
+
696
+ with col4:
697
+ st.metric("Hit Rate", ".1f")
698
+
699
+ # Performance recommendations
700
+ st.subheader("πŸ’‘ Performance Recommendations")
701
+
702
+ recommendations = st.session_state.performance_monitor.get_recommendations()
703
+
704
+ if recommendations:
705
+ for rec in recommendations:
706
+ if "High" in rec or "Low" in rec:
707
+ st.error(rec)
708
+ elif "Moderate" in rec or "Consider" in rec:
709
+ st.warning(rec)
710
+ else:
711
+ st.info(rec)
712
+ else:
713
+ st.success("βœ… Performance is optimal!")
714
+
715
+ # Cache management
716
+ st.subheader("🧠 Cache Management")
717
+
718
+ col1, col2 = st.columns(2)
719
+
720
+ with col1:
721
+ if st.button("πŸ”„ Clear Cache", type="secondary", use_container_width=True):
722
+ st.session_state.cache_manager.clear_cache()
723
+ st.success("Cache cleared successfully!")
724
+ st.rerun()
725
+
726
+ with col2:
727
+ if st.button("πŸ“Š Reset Statistics", use_container_width=True):
728
+ st.session_state.performance_monitor.reset_stats()
729
+ st.success("Statistics reset!")
730
+ st.rerun()
731
+
732
+ def main():
733
+ """Main application function for Spaces"""
734
+ # Initialize session state
735
+ initialize_spaces_session()
736
+
737
+ # Create sidebar with navigation and status
738
+ with st.sidebar:
739
+ st.title("βš–οΈ NZ Legislation Analyzer")
740
+ st.markdown("---")
741
+ st.markdown("**Spaces Edition**")
742
+ st.markdown("---")
743
+
744
+ # Navigation
745
+ pages = {
746
+ "🏠 Home": "home",
747
+ "πŸ“€ Upload & Process": "upload",
748
+ "πŸ“Š Analysis Results": "results",
749
+ "βš™οΈ Settings": "settings",
750
+ "πŸ“ˆ Performance": "performance"
751
+ }
752
+
753
+ selected_page = st.selectbox(
754
+ "Navigate to:",
755
+ list(pages.keys()),
756
+ key="nav_select"
757
+ )
758
+
759
+ st.markdown("---")
760
+
761
+ # Cache status
762
+ with st.expander("🧠 Cache Status", expanded=True):
763
+ cache_stats = st.session_state.cache_manager.get_stats()
764
+ st.metric("Cache Hits", cache_stats['hits'])
765
+ st.metric("Cache Misses", cache_stats['misses'])
766
+ st.metric("Hit Rate", ".1f")
767
+ st.metric("Cached Chunks", cache_stats['entries'])
768
+
769
+ if st.button("Clear Cache", type="secondary"):
770
+ st.session_state.cache_manager.clear_cache()
771
+ st.rerun()
772
+
773
+ # Performance metrics
774
+ with st.expander("πŸ“Š Performance", expanded=True):
775
+ perf_stats = st.session_state.performance_monitor.get_stats()
776
+ st.metric("Memory Usage", ".1f")
777
+ st.metric("CPU Usage", ".1f")
778
+
779
+ # Processing status
780
+ if st.session_state.processing_status['is_running']:
781
+ with st.expander("πŸ”„ Processing Status", expanded=True):
782
+ st.progress(st.session_state.processing_status['progress'])
783
+ st.text(st.session_state.processing_status['current_task'])
784
+ st.text(f"Chunk {st.session_state.processing_status['processed_chunks']}/"
785
+ f"{st.session_state.processing_status['total_chunks']}")
786
+
787
+ # Main content area
788
+ page = pages[selected_page]
789
+
790
+ if page == "home":
791
+ show_spaces_optimized_home_page()
792
+ elif page == "upload":
793
+ show_spaces_optimized_upload_page()
794
+ elif page == "results":
795
+ show_spaces_optimized_results_page()
796
+ elif page == "settings":
797
+ show_spaces_optimized_settings_page()
798
+ elif page == "performance":
799
+ show_spaces_optimized_performance_page()
800
+
801
+ # Footer with Spaces branding
802
+ st.markdown("---")
803
+ st.markdown(
804
+ """
805
+ <div style='text-align: center; color: #666; font-size: 12px;'>
806
+ NZ Legislation Loophole Analyzer v1.0.0 (Spaces Edition) |
807
+ Built with Streamlit & Llama.cpp | Hosted on πŸ€— Hugging Face Spaces
808
+ </div>
809
+ """,
810
+ unsafe_allow_html=True
811
+ )
812
+
813
+ if __name__ == "__main__":
814
+ main()