Spaces:
Running
Running
| """ | |
| Utility functions for query classification and response generation | |
| """ | |
| import re | |
| from typing import Tuple | |
| class QueryClassifier: | |
| """Classify queries to determine if retrieval is needed""" | |
| # Simple greetings/acknowledgments (no retrieval needed) | |
| SIMPLE_PATTERNS = [ | |
| r"\b(hi|hello|hey|greetings|good morning|good evening|good afternoon)\b", | |
| r"\b(thank you|thanks|thx|appreciate it)\b", | |
| r"\b(bye|goodbye|see you|take care)\b", | |
| r"\b(ok|okay|got it|understood|alright|sure)\b", | |
| r"\b(yes|yeah|yep|no|nope)\b", | |
| ] | |
| # Medical keywords (definitely needs retrieval) | |
| MEDICAL_KEYWORDS = [ | |
| "symptom", | |
| "treatment", | |
| "disease", | |
| "diagnosis", | |
| "medicine", | |
| "medication", | |
| "cure", | |
| "pain", | |
| "fever", | |
| "infection", | |
| "doctor", | |
| "hospital", | |
| "prescription", | |
| "side effect", | |
| "dosage", | |
| "therapy", | |
| "vaccine", | |
| "surgery", | |
| "condition", | |
| "blood", | |
| "pressure", | |
| "diabetes", | |
| "cancer", | |
| "heart", | |
| "lung", | |
| "kidney", | |
| "test", | |
| "scan", | |
| "mri", | |
| "x-ray", | |
| "injury", | |
| "allergy", | |
| "chronic", | |
| "acute", | |
| "disorder", | |
| "illness", | |
| "sick", | |
| "health", | |
| ] | |
| def needs_retrieval(cls, query: str) -> Tuple[bool, str]: | |
| """ | |
| Determine if query needs document retrieval | |
| Args: | |
| query: User's input message | |
| Returns: | |
| Tuple[bool, str]: (needs_retrieval, reason) | |
| """ | |
| query_lower = query.lower().strip() | |
| word_count = len(query_lower.split()) | |
| # Rule 1: Very short queries with simple patterns (no retrieval) | |
| if word_count <= 3: | |
| for pattern in cls.SIMPLE_PATTERNS: | |
| if re.search(pattern, query_lower): | |
| return False, "simple_greeting" | |
| # Rule 2: Contains medical keywords (needs retrieval) | |
| for keyword in cls.MEDICAL_KEYWORDS: | |
| if keyword in query_lower: | |
| return True, "medical_keyword_detected" | |
| # Rule 3: Question words in longer queries (likely needs retrieval) | |
| question_words = [ | |
| "what", | |
| "how", | |
| "why", | |
| "when", | |
| "where", | |
| "which", | |
| "who", | |
| "can", | |
| "should", | |
| "is", | |
| "are", | |
| "does", | |
| "do", | |
| "could", | |
| "would", | |
| "will", | |
| ] | |
| if word_count >= 3 and any(q in query_lower.split()[:3] for q in question_words): | |
| return True, "question_detected" | |
| # Rule 4: Single word queries (context-dependent, default to no retrieval) | |
| if word_count == 1: | |
| return False, "single_word" | |
| # Default: If uncertain and query is substantial, use retrieval | |
| if word_count >= 4: | |
| return True, "substantial_query" | |
| return False, "default_no_retrieval" | |
| def get_simple_response(cls, query: str) -> str: | |
| """ | |
| Generate appropriate response for non-retrieval queries | |
| Args: | |
| query: User's input message | |
| Returns: | |
| str: Appropriate response without retrieval | |
| """ | |
| query_lower = query.lower().strip() | |
| # Greetings | |
| if re.search(cls.SIMPLE_PATTERNS[0], query_lower): | |
| return ( | |
| "Hello! I'm your medical assistant. I can help answer questions about " | |
| "symptoms, treatments, medications, and general health information. " | |
| "How can I assist you today?" | |
| ) | |
| # Thanks | |
| if re.search(cls.SIMPLE_PATTERNS[1], query_lower): | |
| return ( | |
| "You're very welcome! If you have any other health-related questions, " | |
| "feel free to ask. I'm here to help!" | |
| ) | |
| # Goodbye | |
| if re.search(cls.SIMPLE_PATTERNS[2], query_lower): | |
| return ( | |
| "Goodbye! Take care of your health. Feel free to return anytime you " | |
| "have questions. Stay well!" | |
| ) | |
| # Acknowledgments | |
| if re.search(cls.SIMPLE_PATTERNS[3], query_lower): | |
| return ( | |
| "Is there anything else you'd like to know about your health or medical concerns?" | |
| ) | |
| # Yes/No | |
| if re.search(cls.SIMPLE_PATTERNS[4], query_lower): | |
| return ( | |
| "Could you please provide more details about your question? " | |
| "I'm here to help with any health-related information you need." | |
| ) | |
| # Default | |
| return ( | |
| "I'm here to help with medical and health-related questions. " | |
| "Could you please elaborate on what you'd like to know?" | |
| ) | |
| class StreamingHandler: | |
| """Handle streaming responses from LangChain""" | |
| async def stream_rag_response(rag_chain, input_data: dict): | |
| """ | |
| Stream tokens from RAG chain | |
| Args: | |
| rag_chain: The retrieval chain to stream from | |
| input_data: Dict with 'input' and 'chat_history' keys | |
| Yields: | |
| str: JSON formatted chunks with token data | |
| """ | |
| import json | |
| try: | |
| # Stream the response | |
| full_answer = "" | |
| async for chunk in rag_chain.astream(input_data): | |
| # Extract answer tokens from the chunk | |
| if "answer" in chunk: | |
| token = chunk["answer"] | |
| full_answer += token | |
| # Send token as JSON | |
| yield f"data: {json.dumps({'token': token, 'done': False})}\n\n" | |
| # Send completion signal | |
| yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': full_answer})}\n\n" | |
| except Exception as e: | |
| error_msg = f"Streaming error: {str(e)}" | |
| yield f"data: {json.dumps({'error': error_msg, 'done': True})}\n\n" | |
| async def stream_simple_response(response: str): | |
| """ | |
| Stream a simple non-retrieval response character by character | |
| Args: | |
| response: The complete response text | |
| Yields: | |
| str: JSON formatted chunks with token data | |
| """ | |
| import json | |
| import asyncio | |
| # Stream character by character with slight delay for smooth effect | |
| for char in response: | |
| yield f"data: {json.dumps({'token': char, 'done': False})}\n\n" | |
| await asyncio.sleep(0.01) # Small delay for smooth streaming | |
| # Send completion signal | |
| yield f"data: {json.dumps({'token': '', 'done': True, 'full_answer': response})}\n\n" | |