Spaces:
Running
Running
Portfolio User
commited on
Commit
·
0dcae91
0
Parent(s):
Initial commit: Medical AI Agent with Gemini and OpenMed datasets
Browse files- .env.example +2 -0
- .gitignore +20 -0
- README.md +53 -0
- agent/core.py +32 -0
- app.py +62 -0
- data/loader.py +35 -0
- requirements.txt +6 -0
.env.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GEMINI_API_KEY=your_key_here
|
| 2 |
+
HF_TOKEN=your_token_here
|
.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
venv/
|
| 6 |
+
.env
|
| 7 |
+
|
| 8 |
+
# Data
|
| 9 |
+
data/cache/
|
| 10 |
+
*.csv
|
| 11 |
+
*.json
|
| 12 |
+
*.parquet
|
| 13 |
+
|
| 14 |
+
# IDEs
|
| 15 |
+
.vscode/
|
| 16 |
+
.idea/
|
| 17 |
+
|
| 18 |
+
# OS
|
| 19 |
+
.DS_Store
|
| 20 |
+
Thumbs.db
|
README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Medical AI Agent
|
| 2 |
+
|
| 3 |
+
A powerful, conversational medical agent built using **Google Gemini 1.5 Flash** and medical datasets from **Hugging Face (OpenMed)**.
|
| 4 |
+
|
| 5 |
+
## 🚀 Features
|
| 6 |
+
- **Conversational Intelligence:** Powered by Gemini 1.5 Flash for high-quality medical reasoning.
|
| 7 |
+
- **Data-Driven:** Grounded in real medical dialogues and datasets from the [OpenMed Collection](https://huggingface.co/collections/OpenMed/medical-datasets).
|
| 8 |
+
- **Dynamic Updates:** Built-in tools to fetch and integrate new datasets regularly.
|
| 9 |
+
- **Web Interface:** Easy-to-use UI built with Gradio.
|
| 10 |
+
|
| 11 |
+
## 📋 Prerequisites
|
| 12 |
+
- Python 3.10+
|
| 13 |
+
- Google Gemini API Key (get it from [Google AI Studio](https://aistudio.google.com/))
|
| 14 |
+
- Hugging Face Token (if accessing private/restricted datasets)
|
| 15 |
+
|
| 16 |
+
## 🛠️ Installation
|
| 17 |
+
|
| 18 |
+
1. **Clone the repository:**
|
| 19 |
+
```bash
|
| 20 |
+
git clone https://github.com/shahnewazkabirrafi017-hub/medical-ai-agent.git
|
| 21 |
+
cd medical-ai-agent
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
2. **Install dependencies:**
|
| 25 |
+
```bash
|
| 26 |
+
pip install -r requirements.txt
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
3. **Set up environment variables:**
|
| 30 |
+
Create a `.env` file in the root directory:
|
| 31 |
+
```env
|
| 32 |
+
GEMINI_API_KEY=your_gemini_api_key_here
|
| 33 |
+
HF_TOKEN=your_huggingface_token_here
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## 🖥️ Usage
|
| 37 |
+
Run the application locally:
|
| 38 |
+
```bash
|
| 39 |
+
python app.py
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## 📂 Project Structure
|
| 43 |
+
- `app.py`: The Gradio web interface.
|
| 44 |
+
- `agent/`: Core AI agent logic.
|
| 45 |
+
- `data/`: Scripts for dataset management and caching.
|
| 46 |
+
- `requirements.txt`: Python package requirements.
|
| 47 |
+
- `.env.example`: Template for environment variables.
|
| 48 |
+
|
| 49 |
+
## 🤝 Contributing
|
| 50 |
+
Updates are welcome! Since we plan to regularly add more datasets, feel free to submit pull requests with new data processing scripts.
|
| 51 |
+
|
| 52 |
+
## ⚠️ Disclaimer
|
| 53 |
+
This AI agent is for **educational and research purposes only**. It is not a substitute for professional medical advice, diagnosis, or treatment. Always consult with a qualified healthcare provider for medical concerns.
|
agent/core.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class MedicalAgent:
|
| 8 |
+
def __init__(self, api_key=None):
|
| 9 |
+
self.api_key = api_key or os.getenv("GEMINI_API_KEY")
|
| 10 |
+
if not self.api_key:
|
| 11 |
+
raise ValueError("GEMINI_API_KEY not found. Please set it in your environment or .env file.")
|
| 12 |
+
|
| 13 |
+
genai.configure(api_key=self.api_key)
|
| 14 |
+
|
| 15 |
+
# Using Gemini 1.5 Flash for speed and generous free tier
|
| 16 |
+
self.model = genai.GenerativeModel(
|
| 17 |
+
model_name="gemini-1.5-flash",
|
| 18 |
+
system_instruction=(
|
| 19 |
+
"You are an advanced Medical AI Assistant. Your goal is to provide accurate, "
|
| 20 |
+
"helpful, and empathetic medical information based on available datasets. "
|
| 21 |
+
"Always maintain a professional tone. "
|
| 22 |
+
"IMPORTANT: You are an AI, not a doctor. Always include a disclaimer that "
|
| 23 |
+
"this information is for educational purposes and the user should consult "
|
| 24 |
+
"a real healthcare professional."
|
| 25 |
+
)
|
| 26 |
+
)
|
| 27 |
+
self.chat = self.model.start_chat(history=[])
|
| 28 |
+
|
| 29 |
+
def get_response(self, user_input, context=""):
|
| 30 |
+
prompt = f"User Question: {user_input}\n\nRelevant Medical Context/Data: {context}"
|
| 31 |
+
response = self.chat.send_message(prompt)
|
| 32 |
+
return response.text
|
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from agent.core import MedicalAgent
|
| 3 |
+
from data.loader import fetch_med_dialog_sample, format_dialogue_context
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# Initialize the agent
|
| 10 |
+
# Note: User needs to provide GEMINI_API_KEY in .env
|
| 11 |
+
try:
|
| 12 |
+
agent = MedicalAgent()
|
| 13 |
+
# Fetch some initial context to 'prime' the agent
|
| 14 |
+
initial_data = fetch_med_dialog_sample(3)
|
| 15 |
+
medical_context = format_dialogue_context(initial_data)
|
| 16 |
+
except Exception as e:
|
| 17 |
+
print(f"Warning: Agent initialization failed (likely missing API key): {e}")
|
| 18 |
+
agent = None
|
| 19 |
+
medical_context = ""
|
| 20 |
+
|
| 21 |
+
def medical_chat_interface(message, history):
|
| 22 |
+
if not agent:
|
| 23 |
+
return "Error: Agent not initialized. Please ensure GEMINI_API_KEY is set in your .env file."
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
response = agent.get_response(message, context=medical_context)
|
| 27 |
+
return response
|
| 28 |
+
except Exception as e:
|
| 29 |
+
return f"An error occurred: {str(e)}"
|
| 30 |
+
|
| 31 |
+
# Custom Theme for a Professional Medical Look
|
| 32 |
+
theme = gr.themes.Soft(
|
| 33 |
+
primary_hue="blue",
|
| 34 |
+
secondary_hue="slate",
|
| 35 |
+
neutral_hue="slate",
|
| 36 |
+
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
|
| 37 |
+
).set(
|
| 38 |
+
body_background_fill="*neutral_50",
|
| 39 |
+
block_background_fill="white",
|
| 40 |
+
block_border_width="1px",
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
with gr.Blocks(theme=theme, title="Medical AI Agent") as demo:
|
| 44 |
+
gr.Markdown(
|
| 45 |
+
"""
|
| 46 |
+
# 🏥 Medical AI Assistant
|
| 47 |
+
Developed by **[shahnewazkabirrafi017-hub](https://github.com/shahnewazkabirrafi017-hub)**
|
| 48 |
+
|
| 49 |
+
This agent is grounded in the **OpenMed** dataset collection and powered by **Gemini 1.5 Flash**.
|
| 50 |
+
|
| 51 |
+
*⚠️ Disclaimer: This is an AI tool for educational purposes. Consult a doctor for medical advice.*
|
| 52 |
+
"""
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
chatbot = gr.ChatInterface(
|
| 56 |
+
fn=medical_chat_interface,
|
| 57 |
+
examples=["What are common symptoms of iron deficiency?", "How can I improve my sleep hygiene?", "Explain what hypertension is in simple terms."],
|
| 58 |
+
type="messages"
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
demo.launch()
|
data/loader.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def fetch_med_dialog_sample(limit=5):
|
| 6 |
+
"""
|
| 7 |
+
Fetches a small sample from the MedDialog dataset to use as context.
|
| 8 |
+
"""
|
| 9 |
+
print(f"Fetching {limit} dialogues from MedDialog...")
|
| 10 |
+
try:
|
| 11 |
+
# Loading a small portion of the English MedDialog dataset
|
| 12 |
+
dataset = load_dataset("OpenMed/MedDialog", split="train", streaming=True)
|
| 13 |
+
samples = []
|
| 14 |
+
for i, entry in enumerate(dataset):
|
| 15 |
+
if i >= limit:
|
| 16 |
+
break
|
| 17 |
+
samples.append(entry)
|
| 18 |
+
|
| 19 |
+
return samples
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Error fetching dataset: {e}")
|
| 22 |
+
return []
|
| 23 |
+
|
| 24 |
+
def format_dialogue_context(samples):
|
| 25 |
+
context = "Here are some examples of medical dialogues for reference:\n\n"
|
| 26 |
+
for sample in samples:
|
| 27 |
+
# Adjust based on actual dataset structure (checking common fields)
|
| 28 |
+
dialogue = sample.get('utterances', sample.get('description', ''))
|
| 29 |
+
context += f"- {dialogue}\n"
|
| 30 |
+
return context
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
# Test fetch
|
| 34 |
+
data = fetch_med_dialog_sample(2)
|
| 35 |
+
print(format_dialogue_context(data))
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
google-generativeai
|
| 2 |
+
datasets
|
| 3 |
+
pandas
|
| 4 |
+
gradio
|
| 5 |
+
python-dotenv
|
| 6 |
+
huggingface_hub
|