Spaces:

awellis
/

bfh-studadmin-assist

Sleeping

bfh-studadmin-assist / app_simple.py.bak

Refactor document ingestion and processing; update configurations for chunking and retrieval, enhance error logging, and implement markdown-aware chunking

78a356b 2 months ago

raw

history blame contribute delete

6.68 kB

	"""Simple fast version - single LLM call instead of multi-agent pipeline."""

	import gradio as gr
	from openai import OpenAI
	import os
	import pickle
	from pathlib import Path
	import logging

	from src.config import get_config
	from src.document_processing.loader import MarkdownDocumentLoader
	from src.document_processing.chunker import SemanticChunker
	from src.indexing.memory_indexer import MemoryDocumentIndexer
	from src.retrieval.memory_retriever import MemoryRetriever

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	class SimpleFastAssistant:
	"""Simple fast assistant - one LLM call instead of 3."""

	def __init__(self):
	self.config = get_config()
	self.client = OpenAI(api_key=self.config.llm.api_key)

	# Load or create document store
	self.indexer = MemoryDocumentIndexer(llm_config=self.config.llm)
	self._load_or_create_documents()

	# Initialize retriever
	self.retriever = MemoryRetriever(
	document_store=self.indexer.document_store,
	llm_config=self.config.llm,
	retrieval_config=self.config.retrieval,
	)

	def _load_or_create_documents(self):
	"""Load documents from pickle or create fresh."""
	doc_store_path = Path("data/document_store.pkl")

	if doc_store_path.exists():
	logger.info(f"Loading document store from {doc_store_path}...")
	try:
	with open(doc_store_path, "rb") as f:
	self.indexer.document_store = pickle.load(f)
	logger.info(f"Loaded {self.indexer.get_document_count()} documents")
	return
	except Exception as e:
	logger.warning(f"Failed to load document store: {e}")

	# Create documents if not found
	logger.info("Creating fresh document index...")
	loader = MarkdownDocumentLoader(self.config.document_processing.documents_path)
	documents = loader.load_documents()

	chunker = SemanticChunker(
	chunk_size=self.config.document_processing.chunk_size,
	chunk_overlap=self.config.document_processing.chunk_overlap,
	min_chunk_size=self.config.document_processing.min_chunk_size,
	)
	chunked_docs = chunker.chunk_documents(documents)

	self.indexer.index_documents(chunked_docs)

	# Save for next time
	doc_store_path.parent.mkdir(parents=True, exist_ok=True)
	with open(doc_store_path, "wb") as f:
	pickle.dump(self.indexer.document_store, f)
	logger.info(f"Saved document store to {doc_store_path}")

	def process_query(self, query: str):
	"""Process query with single LLM call."""
	import time
	start = time.time()

	# Retrieve documents
	docs = self.retriever.retrieve(query)

	# Build context
	context = "\n\n".join([
	f"Document {i+1} (from {doc.meta.get('source_file', 'unknown')}):\n{doc.content}"
	for i, doc in enumerate(docs[:3])
	])

	# Single LLM call
	system_prompt = """You are an email assistant for BFH (Bern University of Applied Sciences) administrative staff.

	Compose professional email responses to student queries based on the provided context documents.

	Guidelines:
	- Write in the same language as the query (German/English)
	- Use professional tone with formal German (Sie)
	- Include subject line
	- Reference specific forms/procedures from context
	- Be clear and concise

	Format:
	Subject: [subject line]

	[email body]"""

	user_prompt = f"""Student Query: {query}

	Context from knowledge base:
	{context if context else "No relevant documents found."}

	Compose a professional email response."""

	# GPT-5 uses max_completion_tokens instead of max_tokens
	completion_params = {
	"model": self.config.llm.model_name,
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	"temperature": self.config.llm.temperature,
	}

	# Use correct parameter based on model
	# GPT-5-nano only supports temperature=1 (default), so dont set it
	completion_params["max_completion_tokens"] = self.config.llm.max_tokens
	else:
	completion_params["max_tokens"] = self.config.llm.max_tokens

	response = self.client.chat.completions.create(**completion_params)

	email = response.choices[0].message.content
	elapsed = time.time() - start

	# Parse subject and body
	lines = email.split('\n')
	subject = lines[0].replace('Subject:', '').strip() if lines else "Response"
	body = '\n'.join(lines[1:]).strip() if len(lines) > 1 else email

	stats = f"Time: {elapsed:.1f}s \| Docs: {len(docs)} \| Model: {self.config.llm.model_name}"

	sources = [
	{
	"Source": doc.meta.get('source_file', 'Unknown'),
	"Score": f"{doc.score:.3f}",
	"Preview": doc.content[:150] + "..."
	}
	for doc in docs
	]

	return subject, body, stats, sources


	# Create assistant
	assistant = SimpleFastAssistant()


	def process(query):
	"""Gradio handler."""
	return assistant.process_query(query)


	# Create UI
	with gr.Blocks(title="BFH Email Assistant (Fast Mode)", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📧 BFH Email Assistant - Fast Mode

	Single LLM call instead of multi-agent pipeline = 10x faster!
	""")

	with gr.Row():
	with gr.Column():
	query_input = gr.Textbox(
	label="Student Query",
	placeholder="e.g., Wie kann ich mich exmatrikulieren?",
	lines=3
	)
	submit_btn = gr.Button("Generate Email", variant="primary")

	with gr.Column():
	stats_output = gr.Markdown(label="Stats")

	subject_output = gr.Textbox(label="Subject", lines=1)
	body_output = gr.Textbox(label="Body", lines=12)

	sources_output = gr.Dataframe(
	headers=["Source", "Score", "Preview"],
	label="Retrieved Documents"
	)

	submit_btn.click(
	fn=process,
	inputs=[query_input],
	outputs=[subject_output, body_output, stats_output, sources_output]
	)

	gr.Examples(
	examples=[
	["Wie kann ich mich exmatrikulieren?"],
	["Was kostet eine Namensänderung?"],
	["How do I apply for a leave of absence?"],
	],
	inputs=[query_input]
	)

	if __name__ == "__main__":
	demo.launch()