Medi-Agent / data /loader.py
Portfolio User
Initial commit: Medical AI Agent with Gemini and OpenMed datasets
0dcae91
from datasets import load_dataset
import pandas as pd
import os
def fetch_med_dialog_sample(limit=5):
"""
Fetches a small sample from the MedDialog dataset to use as context.
"""
print(f"Fetching {limit} dialogues from MedDialog...")
try:
# Loading a small portion of the English MedDialog dataset
dataset = load_dataset("OpenMed/MedDialog", split="train", streaming=True)
samples = []
for i, entry in enumerate(dataset):
if i >= limit:
break
samples.append(entry)
return samples
except Exception as e:
print(f"Error fetching dataset: {e}")
return []
def format_dialogue_context(samples):
context = "Here are some examples of medical dialogues for reference:\n\n"
for sample in samples:
# Adjust based on actual dataset structure (checking common fields)
dialogue = sample.get('utterances', sample.get('description', ''))
context += f"- {dialogue}\n"
return context
if __name__ == "__main__":
# Test fetch
data = fetch_med_dialog_sample(2)
print(format_dialogue_context(data))