Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import pandas as pd | |
| import os | |
| def fetch_med_dialog_sample(limit=5): | |
| """ | |
| Fetches a small sample from the MedDialog dataset to use as context. | |
| """ | |
| print(f"Fetching {limit} dialogues from MedDialog...") | |
| try: | |
| # Loading a small portion of the English MedDialog dataset | |
| dataset = load_dataset("OpenMed/MedDialog", split="train", streaming=True) | |
| samples = [] | |
| for i, entry in enumerate(dataset): | |
| if i >= limit: | |
| break | |
| samples.append(entry) | |
| return samples | |
| except Exception as e: | |
| print(f"Error fetching dataset: {e}") | |
| return [] | |
| def format_dialogue_context(samples): | |
| context = "Here are some examples of medical dialogues for reference:\n\n" | |
| for sample in samples: | |
| # Adjust based on actual dataset structure (checking common fields) | |
| dialogue = sample.get('utterances', sample.get('description', '')) | |
| context += f"- {dialogue}\n" | |
| return context | |
| if __name__ == "__main__": | |
| # Test fetch | |
| data = fetch_med_dialog_sample(2) | |
| print(format_dialogue_context(data)) | |