awellis commited on
Commit
9926cde
·
1 Parent(s): fd727a1

Create rag_email_assistant_haystack_2_pydantic_ai_gradio_modular_2025_baseline.py

Browse files
rag_email_assistant_haystack_2_pydantic_ai_gradio_modular_2025_baseline.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project layout (place files as shown)
2
+ # ├── app/
3
+ # │ ├── __init__.py
4
+ # │ ├── config.py
5
+ # │ ├── logging_setup.py
6
+ # │ ├── models.py
7
+ # │ ├── utils/
8
+ # │ │ ├── __init__.py
9
+ # │ │ └── markdown_loader.py
10
+ # │ ├── retriever/
11
+ # │ │ ├── __init__.py
12
+ # │ │ ├── indexer.py
13
+ # │ │ └── pipeline.py
14
+ # │ ├── agents/
15
+ # │ │ ├── __init__.py
16
+ # │ │ ├── llm_client.py
17
+ # │ │ ├── intent_extractor.py
18
+ # │ │ ├── composer.py
19
+ # │ │ └── fact_checker.py
20
+ # │ ├── gradio_app.py
21
+ # │ └── main.py
22
+ # ├── requirements.txt
23
+ # └── README.md
24
+
25
+ # ===========================
26
+ # requirements.txt
27
+ # ===========================
28
+ # Pin reasonably recent, stable versions (2025 best practices: uv/pip-tools recommended for locking)
29
+ haystack-ai==2.0.1
30
+ opensearch-py==2.6.0
31
+ sentence-transformers==3.1.1
32
+ pydantic==2.8.2
33
+ pydantic-ai==0.0.10
34
+ fastapi==0.115.0
35
+ uvicorn==0.30.6
36
+ httpx==0.27.2
37
+ structlog==24.1.0
38
+ gradio==4.44.0
39
+ markdown-it-py==3.0.0
40
+ mdurl==0.1.2
41
+ python-dotenv==1.0.1
42
+ # optional (CPU fallback for reranker)
43
+ transformers==4.44.2
44
+ accelerate==0.34.2
45
+
46
+ # ===========================
47
+ # app/__init__.py
48
+ # ===========================
49
+ from __future__ import annotations
50
+
51
+ __all__ = [
52
+ "config", "logging_setup", "models",
53
+ ]
54
+
55
+ # ===========================
56
+ # app/logging_setup.py
57
+ # ===========================
58
+ from __future__ import annotations
59
+ import logging
60
+ import structlog
61
+
62
+ _DEF_LEVEL = logging.INFO
63
+
64
+ def setup_logging(level: int = _DEF_LEVEL) -> None:
65
+ """Structured logging; call early in main."""
66
+ logging.basicConfig(level=level, format="%(message)s")
67
+ structlog.configure(
68
+ processors=[
69
+ structlog.processors.TimeStamper(fmt="iso"),
70
+ structlog.processors.add_log_level,
71
+ structlog.processors.StackInfoRenderer(),
72
+ structlog.processors.format_exc_info,
73
+ structlog.processors.JSONRenderer()
74
+ ],
75
+ logger_factory=structlog.stdlib.LoggerFactory(),
76
+ wrapper_class=structlog.stdlib.BoundLogger,
77
+ cache_logger_on_first_use=True,
78
+ )
79
+
80
+ # ===========================
81
+ # app/config.py
82
+ # ===========================
83
+ from __future__ import annotations
84
+ from pydantic import BaseModel, Field
85
+ from pydantic_settings import BaseSettings
86
+ from typing import Optional
87
+
88
+ class OpenSearchSettings(BaseModel):
89
+ host: str = Field(default="localhost")
90
+ port: int = Field(default=9200)
91
+ scheme: str = Field(default="http")
92
+ index_name: str = Field(default="policies-v1")
93
+ embedding_dim: int = Field(default=1024)
94
+
95
+ class ModelSettings(BaseModel):
96
+ embedding_model: str = Field(default="intfloat/multilingual-e5-large-instruct")
97
+ reranker_model: str = Field(default="BAAI/bge-reranker-v2-m3")
98
+ # LLM endpoint: use OpenAI-compatible endpoint or local server
99
+ llm_base_url: str = Field(default="http://localhost:8001/v1")
100
+ llm_api_key: Optional[str] = Field(default=None)
101
+ llm_model: str = Field(default="openai/gpt-oss-20b")
102
+
103
+ class AppSettings(BaseSettings):
104
+ env: str = Field(default="dev") # dev|prod|space
105
+ os: OpenSearchSettings = Field(default_factory=OpenSearchSettings)
106
+ models: ModelSettings = Field(default_factory=ModelSettings)
107
+ # retrieval knobs
108
+ bm25_k: int = Field(default=16)
109
+ dense_k: int = Field(default=16)
110
+ rerank_k: int = Field(default=5)
111
+ # chunking
112
+ prose_split_length: int = Field(default=12) # ~350 tokens (sentence units)
113
+ prose_overlap: int = Field(default=2)
114
+
115
+ class Config:
116
+ env_nested_delimiter = "__"
117
+ env_prefix = "RAG_" # e.g., RAG_MODELS__LLM_BASE_URL
118
+
119
+ settings = AppSettings() # read from env automatically
120
+
121
+ # ===========================
122
+ # app/models.py
123
+ # ===========================
124
+ from __future__ import annotations
125
+ from typing import List, Dict, Literal
126
+ from pydantic import BaseModel, Field
127
+
128
+ class StudentQuery(BaseModel):
129
+ intent: str
130
+ questions: List[str]
131
+ language: Literal["de", "en", "fr", "it"] = "de"
132
+ entities: Dict[str, str] = Field(default_factory=dict) # {"semester": "HS"}
133
+
134
+ class Evidence(BaseModel):
135
+ passage: str
136
+ section_path: str
137
+ doc_title: str
138
+ score: float | None = None
139
+ doc_id: str | None = None
140
+
141
+ class EmailDraft(BaseModel):
142
+ body: str
143
+ citations: List[Evidence] = Field(default_factory=list)
144
+ warnings: List[str] = Field(default_factory=list)
145
+
146
+ # ===========================
147
+ # app/utils/__init__.py
148
+ # ===========================
149
+
150
+ # ===========================
151
+ # app/utils/markdown_loader.py
152
+ # ===========================
153
+ from __future__ import annotations
154
+ from typing import Iterable, List
155
+ from haystack import Document
156
+ from markdown_it import MarkdownIt
157
+
158
+ md = MarkdownIt()
159
+
160
+ _DEF_LANG = "de"
161
+
162
+ def _serialize_table(tokens: list) -> str:
163
+ # Very simple table serializer (improve as needed)
164
+ rows: List[List[str]] = []
165
+ curr: List[str] = []
166
+ for t in tokens:
167
+ if t.type.endswith("_open"):
168
+ curr = []
169
+ elif t.type.endswith("_close"):
170
+ if curr:
171
+ rows.append(curr)
172
+ elif t.type == "inline":
173
+ curr.append(t.content.strip())
174
+ lines = []
175
+ for r in rows:
176
+ if len(r) >= 2:
177
+ lines.append(f"{r[0]}: {' | '.join(r[1:])}")
178
+ elif r:
179
+ lines.append(r[0])
180
+ return "\n".join(lines)
181
+
182
+ def load_markdown_to_documents(text: str, title: str, section_root: str | None = None, lang: str = _DEF_LANG) -> Iterable[Document]:
183
+ tokens = md.parse(text)
184
+ section = section_root or title
185
+ buff: List[str] = []
186
+ path_stack: List[str] = [section]
187
+
188
+ def flush_paragraph():
189
+ nonlocal buff
190
+ if buff:
191
+ yield Document(content="\n".join(buff), meta={"title": title, "section_path": ">".join(path_stack), "lang": lang, "block_type": "prose"})
192
+ buff = []
193
+
194
+ i = 0
195
+ while i < len(tokens):
196
+ t = tokens[i]
197
+ if t.type.endswith("heading_open"):
198
+ # flush current paragraph
199
+ yield from flush_paragraph()
200
+ # next inline has the text
201
+ h_text = tokens[i+1].content.strip()
202
+ # adjust stack
203
+ # naive: always attach under root
204
+ path_stack = [section, h_text]
205
+ i += 3
206
+ continue
207
+ if t.type == "paragraph_open":
208
+ # collect until paragraph_close
209
+ i += 1
210
+ while tokens[i].type != "paragraph_close":
211
+ if tokens[i].type == "inline":
212
+ buff.append(tokens[i].content)
213
+ i += 1
214
+ # close handled by flush at next event
215
+ elif t.type == "table_open":
216
+ # parse whole table block
217
+ j = i + 1
218
+ table_tokens = []
219
+ depth = 1
220
+ while j < len(tokens) and depth > 0:
221
+ if tokens[j].type == "table_open":
222
+ depth += 1
223
+ elif tokens[j].type == "table_close":
224
+ depth -= 1
225
+ table_tokens.append(tokens[j])
226
+ j += 1
227
+ table_text = _serialize_table(table_tokens)
228
+ yield Document(content=table_text, meta={"title": title, "section_path": ">".join(path_stack), "lang": lang, "block_type": "table"})
229
+ i = j
230
+ continue
231
+ i += 1
232
+ # flush remaining
233
+ yield from flush_paragraph()
234
+
235
+ # ===========================
236
+ # app/retriever/__init__.py
237
+ # ===========================
238
+
239
+ # ===========================
240
+ # app/retriever/indexer.py
241
+ # ===========================
242
+ from __future__ import annotations
243
+ from typing import Iterable
244
+ from haystack.document_stores import OpenSearchDocumentStore
245
+ from haystack.components.preprocessors import DocumentSplitter
246
+ from haystack.components.embedders import SentenceTransformersDocumentEmbedder
247
+ from haystack.components.writers import DocumentWriter
248
+ from haystack import Document
249
+ from app.config import settings
250
+
251
+ _splitter = DocumentSplitter(
252
+ split_by="sentence", split_length=settings.prose_split_length,
253
+ split_overlap=settings.prose_overlap, respect_sentence_boundary=True
254
+ )
255
+
256
+ _embedder = SentenceTransformersDocumentEmbedder(
257
+ model=settings.models.embedding_model,
258
+ normalize_embeddings=True,
259
+ prompt="passage: "
260
+ )
261
+
262
+ def build_docstore() -> OpenSearchDocumentStore:
263
+ return OpenSearchDocumentStore(
264
+ index=settings.os.index_name,
265
+ hosts=[{"host": settings.os.host, "port": settings.os.port, "scheme": settings.os.scheme}],
266
+ embedding_dim=settings.os.embedding_dim,
267
+ )
268
+
269
+ def index_documents(docs: Iterable[Document]) -> int:
270
+ store = build_docstore()
271
+ writer = DocumentWriter(document_store=store)
272
+ # Split prose chunks only; keep tables as-is (block_type metadata guides behavior)
273
+ out_docs = []
274
+ for d in docs:
275
+ if d.meta.get("block_type") == "prose":
276
+ out_docs.extend(_splitter.run(documents=[d])["documents"])
277
+ else:
278
+ out_docs.append(d)
279
+ # Embed
280
+ embedded = _embedder.run(documents=out_docs)["documents"]
281
+ # Persist
282
+ writer.run(documents=embedded)
283
+ return len(embedded)
284
+
285
+ # ===========================
286
+ # app/retriever/pipeline.py
287
+ # ===========================
288
+ from __future__ import annotations
289
+ from typing import Dict, List
290
+ from haystack.document_stores import OpenSearchDocumentStore
291
+ from haystack.components.retrievers import OpenSearchBM25Retriever, OpenSearchEmbeddingRetriever, RRF
292
+ from haystack.components.rankers import TransformersCrossEncoderRanker
293
+ from haystack import Pipeline, Document
294
+ from app.config import settings
295
+
296
+ _store: OpenSearchDocumentStore | None = None
297
+
298
+
299
+ def _store_or_new() -> OpenSearchDocumentStore:
300
+ global _store
301
+ if _store is None:
302
+ _store = OpenSearchDocumentStore(
303
+ index=settings.os.index_name,
304
+ hosts=[{"host": settings.os.host, "port": settings.os.port, "scheme": settings.os.scheme}],
305
+ embedding_dim=settings.os.embedding_dim,
306
+ )
307
+ return _store
308
+
309
+ _bm25 = OpenSearchBM25Retriever(document_store=_store_or_new(), top_k=settings.bm25_k)
310
+ _dense = OpenSearchEmbeddingRetriever(document_store=_store_or_new(), top_k=settings.dense_k)
311
+ _fuser = RRF()
312
+ _reranker = TransformersCrossEncoderRanker(model=settings.models.reranker_model, top_k=settings.rerank_k)
313
+
314
+ _pipe = Pipeline()
315
+ _pipe.add_component("bm25", _bm25)
316
+ _pipe.add_component("dense", _dense)
317
+ _pipe.add_component("fuse", _fuser)
318
+ _pipe.add_component("rerank", _reranker)
319
+ _pipe.connect("bm25", "fuse")
320
+ _pipe.connect("dense", "fuse")
321
+ _pipe.connect("fuse", "rerank")
322
+
323
+
324
+ def retrieve(query_text: str, filters: Dict | None = None) -> List[Document]:
325
+ q_dense = "query: " + query_text # E5 query prefix
326
+ out = _pipe.run({
327
+ "bm25": {"query": query_text, "filters": filters},
328
+ "dense": {"query": q_dense, "filters": filters},
329
+ })
330
+ return out["rerank"]["documents"]
331
+
332
+ # ===========================
333
+ # app/agents/__init__.py
334
+ # ===========================
335
+
336
+ # ===========================
337
+ # app/agents/llm_client.py
338
+ # ===========================
339
+ from __future__ import annotations
340
+ from typing import Any, Dict
341
+ import httpx
342
+ from app.config import settings
343
+
344
+ class LLMClient:
345
+ """Minimal OpenAI-compatible client with timeouts & retries."""
346
+ def __init__(self, base_url: str | None = None, api_key: str | None = None, model: str | None = None) -> None:
347
+ self.base_url = base_url or settings.models.llm_base_url
348
+ self.api_key = api_key or settings.models.llm_api_key or "sk-void"
349
+ self.model = model or settings.models.llm_model
350
+ self._client = httpx.Client(base_url=self.base_url, timeout=30.0)
351
+
352
+ def chat(self, messages: list[dict], response_format: Dict[str, Any] | None = None) -> dict:
353
+ payload: Dict[str, Any] = {"model": self.model, "messages": messages}
354
+ if response_format:
355
+ payload["response_format"] = response_format
356
+ r = self._client.post("/chat/completions", headers={"Authorization": f"Bearer {self.api_key}"}, json=payload)
357
+ r.raise_for_status()
358
+ return r.json()
359
+
360
+ # ===========================
361
+ # app/agents/intent_extractor.py
362
+ # ===========================
363
+ from __future__ import annotations
364
+ from typing import Any
365
+ from pydantic_ai import Agent
366
+ from pydantic import BaseModel, Field
367
+ from app.models import StudentQuery
368
+ from app.agents.llm_client import LLMClient
369
+
370
+ class _StudentQuerySchema(BaseModel):
371
+ intent: str
372
+ questions: list[str]
373
+ language: str = Field(pattern="^(de|en|fr|it)$")
374
+ entities: dict = Field(default_factory=dict)
375
+
376
+ _client = LLMClient()
377
+
378
+ intent_agent = Agent(
379
+ _StudentQuerySchema,
380
+ system_prompt=(
381
+ "You are a university admin triage assistant. Extract intent, a list of explicit questions,"
382
+ " language code (de/en/fr/it), and simple entities (e.g., semester=HS/FS, program)."
383
+ " Return only fields in the schema."
384
+ ),
385
+ model_client="openai", # pydantic-ai maps to OpenAI-compatible; configured via env
386
+ )
387
+
388
+
389
+ def extract(email_text: str) -> StudentQuery:
390
+ res = intent_agent.run_sync(email_text) # returns validated pydantic
391
+ return StudentQuery(**res.model_dump())
392
+
393
+ # ===========================
394
+ # app/agents/composer.py
395
+ # ===========================
396
+ from __future__ import annotations
397
+ from typing import List
398
+ from pydantic_ai import Agent
399
+ from app.models import StudentQuery, EmailDraft, Evidence
400
+
401
+ composer_agent = Agent(
402
+ EmailDraft,
403
+ system_prompt=(
404
+ "You draft clear, courteous, and policy-grounded emails for university admin staff.\n"
405
+ "Use the provided evidence only; do not invent rules. Add short citations (title + section_path).\n"
406
+ "Return a single text body suitable to copy-paste, plus citations and warnings if evidence is weak."
407
+ ),
408
+ model_client="openai",
409
+ )
410
+
411
+
412
+ def compose(query: StudentQuery, evidences: List[Evidence]) -> EmailDraft:
413
+ # Convert evidences to a readable context block
414
+ ctx = "\n\n".join(
415
+ f"[{i+1}] {e.doc_title} > {e.section_path}\n{e.passage}" for i, e in enumerate(evidences)
416
+ )
417
+ user = (
418
+ f"LANG={query.language}\n"
419
+ f"INTENT={query.intent}\n"
420
+ f"QUESTIONS={query.questions}\n"
421
+ f"ENTITIES={query.entities}\n\n"
422
+ f"EVIDENCE:\n{ctx}"
423
+ )
424
+ res = composer_agent.run_sync(user)
425
+ return EmailDraft(**res.model_dump())
426
+
427
+ # ===========================
428
+ # app/agents/fact_checker.py
429
+ # ===========================
430
+ from __future__ import annotations
431
+ from typing import List
432
+ from pydantic_ai import Agent
433
+ from app.models import EmailDraft, Evidence
434
+
435
+ checker_agent = Agent(
436
+ EmailDraft,
437
+ system_prompt=(
438
+ "You verify the draft email is fully supported by the evidence.\n"
439
+ "Add warnings for any claims lacking backing text; suggest placeholders instead of guessing."
440
+ ),
441
+ model_client="openai",
442
+ )
443
+
444
+
445
+ def fact_check(draft: EmailDraft, evidences: List[Evidence]) -> EmailDraft:
446
+ ctx = "\n\n".join(
447
+ f"[{i+1}] {e.doc_title} > {e.section_path}\n{e.passage}" for i, e in enumerate(evidences)
448
+ )
449
+ user = f"DRAFT:\n{draft.body}\n\nEVIDENCE:\n{ctx}"
450
+ res = checker_agent.run_sync(user)
451
+ return EmailDraft(**res.model_dump())
452
+
453
+ # ===========================
454
+ # app/gradio_app.py
455
+ # ===========================
456
+ from __future__ import annotations
457
+ import gradio as gr
458
+ from typing import List
459
+ from app.agents import intent_extractor, composer, fact_checker
460
+ from app.retriever.pipeline import retrieve
461
+ from app.models import StudentQuery, Evidence, EmailDraft
462
+
463
+ _DEF_PLACEHOLDER = "Fügen Sie hier die Studenten-E-Mail ein / Paste the student email here..."
464
+
465
+
466
+ def _to_evidence(documents) -> List[Evidence]:
467
+ evs: List[Evidence] = []
468
+ for d in documents:
469
+ evs.append(Evidence(
470
+ passage=d.content,
471
+ section_path=d.meta.get("section_path", ""),
472
+ doc_title=d.meta.get("title", ""),
473
+ score=d.score,
474
+ doc_id=d.id,
475
+ ))
476
+ return evs
477
+
478
+
479
+ def answer(email_text: str) -> tuple[str, str]:
480
+ if not email_text.strip():
481
+ return "", ""
482
+ q: StudentQuery = intent_extractor.extract(email_text)
483
+ docs = []
484
+ for question in q.questions or [email_text]:
485
+ docs.extend(retrieve(question, filters={"lang": [q.language]}))
486
+ # deduplicate while keeping top scores
487
+ seen = {}
488
+ for d in docs:
489
+ if d.id not in seen or d.score > seen[d.id].score:
490
+ seen[d.id] = d
491
+ top_docs = sorted(seen.values(), key=lambda x: x.score or 0.0, reverse=True)[:8]
492
+ evs = _to_evidence(top_docs)
493
+ draft: EmailDraft = composer.compose(q, evs)
494
+ checked: EmailDraft = fact_checker.fact_check(draft, evs)
495
+
496
+ # Advanced panel content
497
+ adv = []
498
+ for i, e in enumerate(evs, start=1):
499
+ adv.append(f"### {i}. {e.doc_title} › {e.section_path}\nScore: {e.score:.3f}\n\n{e.passage}")
500
+ advanced_md = "\n\n".join(adv)
501
+ return checked.body, advanced_md
502
+
503
+
504
+ def build_interface() -> gr.Blocks:
505
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
506
+ gr.Markdown("# 📬 Staff Assist – RAG Email Drafts (Haystack 2 + PydanticAI)")
507
+ with gr.Row():
508
+ email_in = gr.Textbox(lines=12, placeholder=_DEF_PLACEHOLDER, label="Student Email")
509
+ with gr.Row():
510
+ draft_out = gr.Textbox(lines=14, label="Draft Reply (Editable)")
511
+ with gr.Accordion("Advanced: Retrieved Evidence (chunks & sections)", open=False):
512
+ advanced = gr.Markdown()
513
+ submit = gr.Button("Generate Draft", variant="primary")
514
+ submit.click(answer, inputs=[email_in], outputs=[draft_out, advanced])
515
+ return demo
516
+
517
+ # ===========================
518
+ # app/main.py
519
+ # ===========================
520
+ from __future__ import annotations
521
+ from app.logging_setup import setup_logging
522
+ from app.gradio_app import build_interface
523
+
524
+ if __name__ == "__main__":
525
+ setup_logging()
526
+ ui = build_interface()
527
+ ui.launch(server_name="0.0.0.0", server_port=7860)
528
+
529
+ # ===========================
530
+ # README.md (excerpt)
531
+ # ===========================
532
+ # RAG Email Assistant – Haystack 2 + PydanticAI + Gradio
533
+
534
+ ## Quick start (dev)
535
+ 1. Run OpenSearch locally (or point to your cluster). Create index with k-NN enabled (dimension 1024).
536
+ 2. Set env vars (see `RAG_*` in `app/config.py`).
537
+ 3. Index your Markdown: use `load_markdown_to_documents` + `index_documents` from a small script.
538
+ 4. `python -m app.main`
539
+
540
+ ## Hugging Face Spaces notes
541
+ - Spaces (CPU) can host the **Gradio UI**; but OpenSearch must be reachable via network.
542
+ - If you need a self-contained demo, swap to a local FAISS `InMemoryDocumentStore` in `pipeline.py` (feature flag) and reduce models to CPU-friendly ones.
543
+
544
+ ## Best practices baked in (2025)
545
+ - Strict typing and Pydantic v2 models for agent IO.
546
+ - OpenAI-compatible LLM client abstraction (swap endpoints/models without code changes).
547
+ - Heading-aware chunking, table preservation, hybrid retrieval + cross-encoder rerank.
548
+ - Structured logging (JSON) via `structlog`.
549
+ - Env-driven settings with nested prefixes (Twelve-Factor).
550
+ - Safe defaults (normalized E5 embeddings, E5 query prefix, dedup of docs).
551
+ - Clear separation: ingestion/indexing vs. serving.
552
+
553
+ ## Where to extend
554
+ - Add caching for embeddings & retrieval; add RAG evaluation notebook (Recall@k, groundedness).
555
+ - Add multilingual tone/style templates in `composer.py` based on `query.language`.
556
+ - Add policy/version metadata and link anchors per chunk for clickable citations in UI.
557
+ - Add guardrails (regex) to block sharing internal links when emailing students.
558
+ - Add DSPy for prompt/pipeline optimization once you have labeled email pairs.