bfh-studadmin-assist / app_simple.py.bak
awellis's picture
Refactor document ingestion and processing; update configurations for chunking and retrieval, enhance error logging, and implement markdown-aware chunking
78a356b
"""Simple fast version - single LLM call instead of multi-agent pipeline."""
import gradio as gr
from openai import OpenAI
import os
import pickle
from pathlib import Path
import logging
from src.config import get_config
from src.document_processing.loader import MarkdownDocumentLoader
from src.document_processing.chunker import SemanticChunker
from src.indexing.memory_indexer import MemoryDocumentIndexer
from src.retrieval.memory_retriever import MemoryRetriever
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SimpleFastAssistant:
"""Simple fast assistant - one LLM call instead of 3."""
def __init__(self):
self.config = get_config()
self.client = OpenAI(api_key=self.config.llm.api_key)
# Load or create document store
self.indexer = MemoryDocumentIndexer(llm_config=self.config.llm)
self._load_or_create_documents()
# Initialize retriever
self.retriever = MemoryRetriever(
document_store=self.indexer.document_store,
llm_config=self.config.llm,
retrieval_config=self.config.retrieval,
)
def _load_or_create_documents(self):
"""Load documents from pickle or create fresh."""
doc_store_path = Path("data/document_store.pkl")
if doc_store_path.exists():
logger.info(f"Loading document store from {doc_store_path}...")
try:
with open(doc_store_path, "rb") as f:
self.indexer.document_store = pickle.load(f)
logger.info(f"Loaded {self.indexer.get_document_count()} documents")
return
except Exception as e:
logger.warning(f"Failed to load document store: {e}")
# Create documents if not found
logger.info("Creating fresh document index...")
loader = MarkdownDocumentLoader(self.config.document_processing.documents_path)
documents = loader.load_documents()
chunker = SemanticChunker(
chunk_size=self.config.document_processing.chunk_size,
chunk_overlap=self.config.document_processing.chunk_overlap,
min_chunk_size=self.config.document_processing.min_chunk_size,
)
chunked_docs = chunker.chunk_documents(documents)
self.indexer.index_documents(chunked_docs)
# Save for next time
doc_store_path.parent.mkdir(parents=True, exist_ok=True)
with open(doc_store_path, "wb") as f:
pickle.dump(self.indexer.document_store, f)
logger.info(f"Saved document store to {doc_store_path}")
def process_query(self, query: str):
"""Process query with single LLM call."""
import time
start = time.time()
# Retrieve documents
docs = self.retriever.retrieve(query)
# Build context
context = "\n\n".join([
f"Document {i+1} (from {doc.meta.get('source_file', 'unknown')}):\n{doc.content}"
for i, doc in enumerate(docs[:3])
])
# Single LLM call
system_prompt = """You are an email assistant for BFH (Bern University of Applied Sciences) administrative staff.
Compose professional email responses to student queries based on the provided context documents.
Guidelines:
- Write in the same language as the query (German/English)
- Use professional tone with formal German (Sie)
- Include subject line
- Reference specific forms/procedures from context
- Be clear and concise
Format:
Subject: [subject line]
[email body]"""
user_prompt = f"""Student Query: {query}
Context from knowledge base:
{context if context else "No relevant documents found."}
Compose a professional email response."""
# GPT-5 uses max_completion_tokens instead of max_tokens
completion_params = {
"model": self.config.llm.model_name,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"temperature": self.config.llm.temperature,
}
# Use correct parameter based on model
# GPT-5-nano only supports temperature=1 (default), so dont set it
completion_params["max_completion_tokens"] = self.config.llm.max_tokens
else:
completion_params["max_tokens"] = self.config.llm.max_tokens
response = self.client.chat.completions.create(**completion_params)
email = response.choices[0].message.content
elapsed = time.time() - start
# Parse subject and body
lines = email.split('\n')
subject = lines[0].replace('Subject:', '').strip() if lines else "Response"
body = '\n'.join(lines[1:]).strip() if len(lines) > 1 else email
stats = f"**Time:** {elapsed:.1f}s | **Docs:** {len(docs)} | **Model:** {self.config.llm.model_name}"
sources = [
{
"Source": doc.meta.get('source_file', 'Unknown'),
"Score": f"{doc.score:.3f}",
"Preview": doc.content[:150] + "..."
}
for doc in docs
]
return subject, body, stats, sources
# Create assistant
assistant = SimpleFastAssistant()
def process(query):
"""Gradio handler."""
return assistant.process_query(query)
# Create UI
with gr.Blocks(title="BFH Email Assistant (Fast Mode)", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 📧 BFH Email Assistant - Fast Mode
**Single LLM call** instead of multi-agent pipeline = **10x faster!**
""")
with gr.Row():
with gr.Column():
query_input = gr.Textbox(
label="Student Query",
placeholder="e.g., Wie kann ich mich exmatrikulieren?",
lines=3
)
submit_btn = gr.Button("Generate Email", variant="primary")
with gr.Column():
stats_output = gr.Markdown(label="Stats")
subject_output = gr.Textbox(label="Subject", lines=1)
body_output = gr.Textbox(label="Body", lines=12)
sources_output = gr.Dataframe(
headers=["Source", "Score", "Preview"],
label="Retrieved Documents"
)
submit_btn.click(
fn=process,
inputs=[query_input],
outputs=[subject_output, body_output, stats_output, sources_output]
)
gr.Examples(
examples=[
["Wie kann ich mich exmatrikulieren?"],
["Was kostet eine Namensänderung?"],
["How do I apply for a leave of absence?"],
],
inputs=[query_input]
)
if __name__ == "__main__":
demo.launch()