#!/usr/bin/env python3 import csv import json # Read row 57 from CSV (1-indexed, so row 56 in 0-indexed) print("Reading row 57 from CSV...") with open('function_dataset_v2.csv', 'r') as f: reader = csv.DictReader(f) for i, row in enumerate(reader): if i == 56: # 57th row (0-indexed) print(f"Row {i+1}:") print(f" original_index: {row['original_index']}") print(f" repo_name: '{row['repo_name']}'") print(f" path: '{row['path']}'") print(f" language: '{row['language']}'") print(f" function_name: '{row['function_name']}'") break # Also check a sample JSONL print("\n\nChecking first JSONL entry...") with open('programming_problems.jsonl', 'r') as f: data = json.loads(f.readline()) print(f"original_index: {data['metadata']['original_index']}") print(f"function_name: {data['metadata']['function_name']}") print(f"Current repo_name: '{data['metadata']['repo_name']}'") print(f"Current path: '{data['metadata']['path']}'") print(f"Current language: '{data['metadata']['language']}'") # Count how many rows in CSV have complete info print("\n\nCounting CSV rows with complete metadata...") with open('function_dataset_v2.csv', 'r') as f: reader = csv.DictReader(f) total = 0 complete = 0 for row in reader: total += 1 if row['repo_name'] and row['path'] and row['language']: complete += 1 print(f"Total CSV rows: {total}") print(f"Rows with complete metadata: {complete}") print(f"Rows with missing metadata: {total - complete}")