Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 29 days ago

Commit

c8562d7

1 Parent(s): 3b38a6c

Optimise QA round followup extensive

Browse files

Files changed (3) hide show

.gitignore +2 -1
pipeline.py +49 -3
supervisor.py +5 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 .env
 .setup.txt
-__pycache__/

 .env
 .setup.txt
+__pycache__/
+sample.txt

pipeline.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 import logging
 import threading
 import concurrent.futures
 import gradio as gr
 import spaces
 from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
@@ -118,6 +119,46 @@ def _build_refined_query(base_query: str, insights: dict, insights_block: str) -
     return "\n\n".join([section for section in sections if section])
 def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
     questions = plan.get("questions", []) or []
     if not questions:
@@ -135,7 +176,8 @@ def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str,
         "answers": [],
         "decision_reason": plan.get("decision_reason", ""),
         "initial_hypotheses": plan.get("initial_hypotheses", []),
-        "started_at": time.time()
     }
     _set_clinical_intake_state(session_id, state)
     first_prompt = _format_intake_question(
@@ -144,6 +186,8 @@ def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str,
         max_rounds=max_rounds,
         target_lang=state["original_language"]
     )
     return first_prompt
@@ -193,6 +237,8 @@ def _handle_clinical_answer(session_id: str, answer_text: str):
         max_rounds=state["max_rounds"],
         target_lang=state["original_language"]
     )
     return {"type": "question", "prompt": prompt}
@@ -235,7 +281,7 @@ def stream_chat(
     def elapsed():
         return time.time() - session_start
-    user_id = request.session_hash
     index_dir = f"./{user_id}_index"
     has_rag_index = os.path.exists(index_dir)
@@ -285,7 +331,7 @@ def stream_chat(
     if not enable_clinical_intake:
         _clear_clinical_intake_state(user_id)
     else:
-        intake_state = _get_clinical_intake_state(user_id)
         if intake_state and intake_state.get("awaiting_answer"):
             logger.info("[INTAKE] Awaiting patient response - processing answer")
             intake_result = _handle_clinical_answer(user_id, message)

 import logging
 import threading
 import concurrent.futures
+import hashlib
 import gradio as gr
 import spaces
 from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
     return "\n\n".join([section for section in sections if section])
+def _hash_prompt_text(text: str) -> str:
+    if not text:
+        return ""
+    digest = hashlib.sha1()
+    digest.update(text.strip().encode("utf-8"))
+    return digest.hexdigest()
+def _extract_pending_intake_prompt(history: list) -> str:
+    if not history:
+        return ""
+    for turn in reversed(history):
+        if turn.get("role") != "assistant":
+            continue
+        content = turn.get("content", "")
+        if content.startswith("🩺 Question for clarity"):
+            return content
+    return ""
+def _rehydrate_intake_state(session_id: str, history: list):
+    state = _get_clinical_intake_state(session_id)
+    if state or not history:
+        return state
+    pending_prompt = _extract_pending_intake_prompt(history)
+    if not pending_prompt:
+        return None
+    prompt_hash = _hash_prompt_text(pending_prompt)
+    if not prompt_hash:
+        return None
+    with _clinical_intake_lock:
+        for existing_id, existing_state in list(_clinical_intake_sessions.items()):
+            if existing_state.get("awaiting_answer") and existing_state.get("last_prompt_hash") == prompt_hash:
+                if existing_id != session_id:
+                    _clinical_intake_sessions.pop(existing_id, None)
+                    _clinical_intake_sessions[session_id] = existing_state
+                return existing_state
+    return None
 def _start_clinical_intake_session(session_id: str, plan: dict, base_query: str, original_language: str):
     questions = plan.get("questions", []) or []
     if not questions:
         "answers": [],
         "decision_reason": plan.get("decision_reason", ""),
         "initial_hypotheses": plan.get("initial_hypotheses", []),
+        "started_at": time.time(),
+        "last_prompt_hash": ""
     }
     _set_clinical_intake_state(session_id, state)
     first_prompt = _format_intake_question(
         max_rounds=max_rounds,
         target_lang=state["original_language"]
     )
+    state["last_prompt_hash"] = _hash_prompt_text(first_prompt)
+    _set_clinical_intake_state(session_id, state)
     return first_prompt
         max_rounds=state["max_rounds"],
         target_lang=state["original_language"]
     )
+    state["last_prompt_hash"] = _hash_prompt_text(prompt)
+    _set_clinical_intake_state(session_id, state)
     return {"type": "question", "prompt": prompt}
     def elapsed():
         return time.time() - session_start
+    user_id = request.session_hash or "anonymous"
     index_dir = f"./{user_id}_index"
     has_rag_index = os.path.exists(index_dir)
     if not enable_clinical_intake:
         _clear_clinical_intake_state(user_id)
     else:
+        intake_state = _rehydrate_intake_state(user_id, history)
         if intake_state and intake_state.get("awaiting_answer"):
             logger.info("[INTAKE] Awaiting patient response - processing answer")
             intake_result = _handle_clinical_answer(user_id, message)

supervisor.py CHANGED Viewed

@@ -168,12 +168,17 @@ def _prepare_clinical_question_plan(plan: dict, safe_rounds: int) -> dict:
     if not isinstance(questions, list):
         questions = []
     cleaned = []
     for idx, raw in enumerate(questions):
         if not isinstance(raw, dict):
             continue
         question_text = (raw.get("question") or "").strip()
         if not question_text:
             continue
         entry = dict(raw)
         entry["question"] = question_text
         entry["order"] = entry.get("order") or raw.get("id") or (idx + 1)

     if not isinstance(questions, list):
         questions = []
     cleaned = []
+    seen = set()
     for idx, raw in enumerate(questions):
         if not isinstance(raw, dict):
             continue
         question_text = (raw.get("question") or "").strip()
         if not question_text:
             continue
+        normalized = question_text.lower()
+        if normalized in seen:
+            continue
+        seen.add(normalized)
         entry = dict(raw)
         entry["question"] = question_text
         entry["order"] = entry.get("order") or raw.get("id") or (idx + 1)