Spaces:

rafi11223
/

Medi-Agent

Sleeping

Medi-Agent / data /loader.py

Portfolio User

Initial commit: Medical AI Agent with Gemini and OpenMed datasets

0dcae91 6 days ago

1.16 kB

	from datasets import load_dataset
	import pandas as pd
	import os

	def fetch_med_dialog_sample(limit=5):
	"""
	Fetches a small sample from the MedDialog dataset to use as context.
	"""
	print(f"Fetching {limit} dialogues from MedDialog...")
	try:
	# Loading a small portion of the English MedDialog dataset
	dataset = load_dataset("OpenMed/MedDialog", split="train", streaming=True)
	samples = []
	for i, entry in enumerate(dataset):
	if i >= limit:
	break
	samples.append(entry)

	return samples
	except Exception as e:
	print(f"Error fetching dataset: {e}")
	return []

	def format_dialogue_context(samples):
	context = "Here are some examples of medical dialogues for reference:\n\n"
	for sample in samples:
	# Adjust based on actual dataset structure (checking common fields)
	dialogue = sample.get('utterances', sample.get('description', ''))
	context += f"- {dialogue}\n"
	return context

	if __name__ == "__main__":
	# Test fetch
	data = fetch_med_dialog_sample(2)
	print(format_dialogue_context(data))